]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
ggml : add ops SOFTPLUS, EXPM1, TRI, SOLVE_TRI, CUMSUM (#17063)
authorPiotr Wilkin (ilintar) <redacted>
Thu, 13 Nov 2025 18:54:47 +0000 (19:54 +0100)
committerGitHub <redacted>
Thu, 13 Nov 2025 18:54:47 +0000 (20:54 +0200)
* Add ops needed for new hybrid models: SOFTPLUS, EXPM1, TRI, SOLVE_TRI, CUMSUM

* Update ggml/include/ggml.h

Co-authored-by: Georgi Gerganov <redacted>
* Update tests/test-backend-ops.cpp

Co-authored-by: Georgi Gerganov <redacted>
* Code review

* Whitespace

* Update tests/test-backend-ops.cpp

Co-authored-by: Diego Devesa <redacted>
* This is actually sigmoid, duh.

* Add CONST, remove TRI_KEEP, other changes from review

* Update tests/test-backend-ops.cpp

Co-authored-by: Georgi Gerganov <redacted>
* Update ggml/src/ggml.c

Co-authored-by: Georgi Gerganov <redacted>
* Update ggml/src/ggml.c

Co-authored-by: Georgi Gerganov <redacted>
* Update ggml/src/ggml-cuda/unary.cu

Co-authored-by: Aman Gupta <redacted>
* Remove extra script

* Update ggml/src/ggml.c

Co-authored-by: Diego Devesa <redacted>
* Update tests/test-backend-ops.cpp

Co-authored-by: Diego Devesa <redacted>
* moving changes from laptop [no ci]

* pre-rebase

* Update tests/test-backend-ops.cpp

Co-authored-by: Sigbjørn Skjæret <redacted>
* Update tests/test-backend-ops.cpp

Co-authored-by: Sigbjørn Skjæret <redacted>
* Refactor tests

* ggml : cleanup

* cont : fix ggml_fill srcs

* tests : add note

* ggml : add ggml_fill_inplace

* ggml : add asserts

* ggml : fix ggml_fill constant cast

* cont : ggml_tri minor

* Use TENSOR_LOCALS

* Fix regression from #14596, regenerate

* Don't make commits at night...

---------

Co-authored-by: Georgi Gerganov <redacted>
Co-authored-by: Diego Devesa <redacted>
Co-authored-by: Aman Gupta <redacted>
Co-authored-by: Sigbjørn Skjæret <redacted>
16 files changed:
docs/ops.md
docs/ops/CPU.csv
docs/ops/CUDA.csv
ggml/include/ggml.h
ggml/src/ggml-cpu/ggml-cpu.c
ggml/src/ggml-cpu/ops.cpp
ggml/src/ggml-cpu/ops.h
ggml/src/ggml-cpu/unary-ops.cpp
ggml/src/ggml-cpu/unary-ops.h
ggml/src/ggml-cpu/vec.h
ggml/src/ggml-cuda/ggml-cuda.cu
ggml/src/ggml-cuda/unary.cu
ggml/src/ggml-cuda/unary.cuh
ggml/src/ggml-impl.h
ggml/src/ggml.c
tests/test-backend-ops.cpp

index 0c4f7ef5c811b13dea57a1ba66360c844ae92890..68722fc9009b510e7ebd4bb330f366a6b1b37485 100644 (file)
@@ -18,7 +18,7 @@ Legend:
 |                              ACC | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                              ADD | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
 |                             ADD1 | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
-|                           ADD_ID | â\9d\8c | â\9d\8c | â\9d\8c | â\9d\8c | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                           ADD_ID | â\9d\8c | â\9d\8c | â\9c\85 | â\9c\85 | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                           ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ |
 |                           ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                          ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
@@ -26,9 +26,9 @@ Legend:
 |                            CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ |
 |                           CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ❌ |
 |                             CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ |
-|                          CONV_2D | ❌ | ❌ | ✅ | 🟡 | ❌ | ✅ | ❌ | ✅ | ❌ |
+|                          CONV_2D | ❌ | ❌ | ✅ |  | ❌ | ✅ | ❌ | ✅ | ❌ |
 |                       CONV_2D_DW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
-|                          CONV_3D | â\9d\8c | â\9d\8c | â\9d\8c | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                          CONV_3D | â\9d\8c | â\9d\8c | â\9c\85 | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                              COS | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ |
@@ -36,11 +36,14 @@ Legend:
 |                              CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
 |               CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
 |          CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                           CUMSUM | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                    DIAG_MASK_INF | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
 |                              DIV | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
 |                              DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | ❌ |
 |                              ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                              EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
+|                            EXPM1 | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                             FILL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                   FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ |
 |                            FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
 |                GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
@@ -57,7 +60,7 @@ Legend:
 |                      HARDSIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                        HARDSWISH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                           IM2COL | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ |
-|                        IM2COL_3D | â\9d\8c | â\9d\8c | â\9d\8c | â\9d\8c | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                        IM2COL_3D | â\9d\8c | â\9d\8c | â\9c\85 | â\9c\85 | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                          L2_NORM | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                       LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                              LOG | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
@@ -69,10 +72,10 @@ Legend:
 |                             NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
 |                     NORM_MUL_ADD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                   OPT_STEP_ADAMW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
-|                     OPT_STEP_SGD | â\9d\8c | â\9d\8c | â\9d\8c | â\9d\8c | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                     OPT_STEP_SGD | â\9d\8c | â\9d\8c | â\9c\85 | â\9c\85 | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                         OUT_PROD | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
-|                              PAD | ❌ | ✅ | ✅ |  | ✅ | ✅ | 🟡 | ✅ | ❌ |
-|                   PAD_REFLECT_1D | â\9d\8c | â\9c\85 | â\9c\85 | â\9d\8c | ✅ | ❌ | ✅ | ❌ | ❌ |
+|                              PAD | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ |
+|                   PAD_REFLECT_1D | â\9d\8c | â\9c\85 | â\9c\85 | â\9c\85 | ✅ | ❌ | ✅ | ❌ | ❌ |
 |                          POOL_2D | ❌ | 🟡 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                            REGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
 |                             RELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
@@ -80,15 +83,15 @@ Legend:
 |                      REPEAT_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ |
 |                         RMS_NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ |
 |                    RMS_NORM_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ |
-|                 RMS_NORM_MUL_ADD | â\9d\8c | â\9c\85 | â\9c\85 | â\9c\85 | ✅ | ✅ | ✅ | ✅ | ❌ |
-|                             ROLL | â\9d\8c | â\9d\8c | â\9c\85 | â\9d\8c | ❌ | ❌ | ✅ | ✅ | ❌ |
+|                 RMS_NORM_MUL_ADD | â\9d\8c | â\9c\85 | â\9d\8c | â\9d\8c | ✅ | ✅ | ✅ | ✅ | ❌ |
+|                             ROLL | â\9d\8c | â\9d\8c | â\9c\85 | â\9c\85 | ❌ | ❌ | ✅ | ✅ | ❌ |
 |                             ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
 |                        ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
 |                            ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
 |                        RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                        RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                            SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
-|                              SET | â\9d\8c | â\9d\8c | â\9c\85 | â\9d\8c | ✅ | ❌ | 🟡 | ❌ | ❌ |
+|                              SET | â\9d\8c | â\9d\8c | â\9c\85 | â\9c\85 | ✅ | ❌ | 🟡 | ❌ | ❌ |
 |                         SET_ROWS | ❌ | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
 |                              SGN | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                          SIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
@@ -96,21 +99,24 @@ Legend:
 |                        SILU_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
 |                              SIN | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ |
 |                          SOFTCAP | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
+|                         SOFTPLUS | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                         SOFT_MAX | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
 |                    SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ |
+|                        SOLVE_TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                              SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ |
 |                             SQRT | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | ❌ | ❌ |
 |                         SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                         SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
 |                             STEP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                              SUB | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
-|                              SUM | ❌ | ✅ | ✅ |  | ❌ | ❌ | 🟡 | ✅ | ❌ |
-|                         SUM_ROWS | ❌ | ✅ | ✅ |  | ✅ | ✅ | 🟡 | ✅ | ❌ |
+|                              SUM | ❌ | ✅ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ |
+|                         SUM_ROWS | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ |
 |                           SWIGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
-|                       SWIGLU_OAI | â\9d\8c | â\9d\8c | â\9d\8c | â\9d\8c | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                       SWIGLU_OAI | â\9d\8c | â\9d\8c | â\9c\85 | â\9c\85 | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                             TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ |
 |               TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
 |                         TOPK_MOE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
+|                              TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                            TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
 |                          UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ |
-|                            XIELU | â\9d\8c | â\9d\8c | â\9d\8c | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                            XIELU | â\9d\8c | â\9d\8c | â\9c\85 | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
index 1820028c9a2fe1129ab51ed7d397aa83e2bd65f2..7c223e279c97e2ee3050130993d216ca34780345 100644 (file)
 "CPU","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
 "CPU","EXP","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
 "CPU","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","EXPM1","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","EXPM1","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","SOFTPLUS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
 "CPU","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
 "CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
 "CPU","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
 "CPU","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
 "CPU","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
 "CPU","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
 "CPU","EXP","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
 "CPU","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
+"CPU","EXPM1","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","EXPM1","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
+"CPU","SOFTPLUS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
 "CPU","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
 "CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
-"CPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
-"CPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
-"CPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
-"CPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
-"CPU","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
-"CPU","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
-"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
-"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
+"CPU","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
+"CPU","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
+"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
 "CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
 "CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
 "CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
 "CPU","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
 "CPU","EXP","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
 "CPU","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","EXPM1","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","EXPM1","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","SOFTPLUS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","SOFTPLUS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
 "CPU","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
 "CPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
+"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
 "CPU","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
 "CPU","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
 "CPU","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
 "CPU","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
 "CPU","EXP","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
 "CPU","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
+"CPU","EXPM1","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","EXPM1","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
+"CPU","SOFTPLUS","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","SOFTPLUS","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
 "CPU","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
 "CPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
-"CPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
-"CPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
-"CPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
-"CPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
-"CPU","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
-"CPU","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
-"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU"
-"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU"
+"CPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
+"CPU","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
+"CPU","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
+"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","CPU"
+"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","CPU"
 "CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","CPU"
 "CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","CPU"
 "CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","yes","CPU"
 "CPU","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","yes","CPU"
 "CPU","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","yes","CPU"
 "CPU","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","yes","CPU"
-"CPU","GET_ROWS","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
-"CPU","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=0","support","1","yes","CPU"
-"CPU","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=1","support","1","yes","CPU"
+"CPU","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=2.000000","support","1","yes","CPU"
+"CPU","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=7.000000","support","1","yes","CPU"
+"CPU","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=2.000000","support","1","yes","CPU"
+"CPU","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=7.000000","support","1","yes","CPU"
+"CPU","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=2.000000","support","1","yes","CPU"
+"CPU","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=7.000000","support","1","yes","CPU"
+"CPU","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=2.000000","support","1","yes","CPU"
+"CPU","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=7.000000","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f32,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f32,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f32,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_0,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_0,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f32,n=1,m=8,r=2,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f32,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f32,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f32,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f32,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f16,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f16,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f16,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=f16,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=i32,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=i32,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CPU"
+"CPU","GET_ROWS","type=i32,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CPU"
+"CPU","GET_ROWS","type=i32,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CPU"
 "CPU","GET_ROWS_BACK","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","yes","CPU"
 "CPU","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CPU"
 "CPU","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CPU"
 "CPU","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","0","no","CPU"
 "CPU","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","0","no","CPU"
 "CPU","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","0","no","CPU"
+"CPU","GET_ROWS_BACK","type=mxfp4,n=256,m=5,r=4,b=1,v=0","support","0","no","CPU"
+"CPU","GET_ROWS_BACK","type=mxfp4,n=256,m=5,r=4,b=1,v=1","support","0","no","CPU"
 "CPU","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CPU"
 "CPU","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CPU"
 "CPU","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CPU"
 "CPU","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CPU"
 "CPU","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=0","support","0","no","CPU"
 "CPU","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=f32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
-"CPU","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CPU"
+"CPU","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CPU"
 "CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","yes","CPU"
 "CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","yes","CPU"
 "CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","yes","CPU"
 "CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","CPU"
 "CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","CPU"
 "CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
-"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CPU"
+"CPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f32,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
+"CPU","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],type_kernel=f16,stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CPU"
 "CPU","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=0","support","1","yes","CPU"
 "CPU","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","1","yes","CPU"
 "CPU","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","1","yes","CPU"
 "CPU","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=4,ID=8,IH=8,IW=8,OC=8,KD=1,KH=1,KW=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","1","yes","CPU"
+"CPU","CONV_3D","N=1,IC=4,ID=8,IH=8,IW=8,OC=8,KD=1,KH=1,KW=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","1","yes","CPU"
 "CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CPU"
 "CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CPU"
 "CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CPU"
 "CPU","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","CPU"
 "CPU","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","CPU"
 "CPU","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","CPU"
+"CPU","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","CPU"
 "CPU","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","CPU"
 "CPU","ARGMAX","type=f32,ne=[1024,10,1,1]","support","1","yes","CPU"
 "CPU","ARGMAX","type=f32,ne=[1024,12,1,1]","support","1","yes","CPU"
 "CPU","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","1","yes","CPU"
 "CPU","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","1","yes","CPU"
 "CPU","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q3_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q4_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q5_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=q6_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq2_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq1_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq1_m,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq3_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
-"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=mxfp4,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q3_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q4_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q5_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=q6_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq2_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq1_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq1_m,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq3_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=i32,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=i32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=i32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=i32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,3,3],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
+"CPU","CPY","type_src=f16,type_dst=f16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
+"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
+"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
 "CPU","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","CPU"
 "CPU","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","CPU"
 "CPU","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","CPU"
 "CPU","SUB","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CPU"
 "CPU","MUL","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CPU"
 "CPU","DIV","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CPU"
+"CPU","ADD","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","SUB","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","MUL","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","DIV","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","ADD","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","SUB","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","MUL","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","SUB","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","MUL","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","DIV","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","ADD","type=f16,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","SUB","type=f16,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","MUL","type=f16,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","DIV","type=f16,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","ADD","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","SUB","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","MUL","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","SUB","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CPU"
 "CPU","MUL","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CPU"
 "CPU","DIV","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CPU"
+"CPU","ADD","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","SUB","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","MUL","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","DIV","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","ADD","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","SUB","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","MUL","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","SUB","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","MUL","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","DIV","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
-"CPU","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=2","support","1","yes","CPU"
-"CPU","ADD","type=f32,ne=[16,5,4,3],nr=[1,2,1,1],nf=3","support","1","yes","CPU"
-"CPU","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=4","support","1","yes","CPU"
-"CPU","ADD","type=f32,ne=[16,5,4,3],nr=[1,1,1,2],nf=5","support","1","yes","CPU"
-"CPU","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=6","support","1","yes","CPU"
-"CPU","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=7","support","1","yes","CPU"
-"CPU","ADD","type=f32,ne=[16,5,4,3],nr=[2,2,2,2],nf=8","support","1","yes","CPU"
+"CPU","ADD","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","SUB","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","MUL","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
+"CPU","DIV","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CPU"
 "CPU","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
-"CPU","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000","support","1","yes","CPU"
-"CPU","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000","support","1","yes","CPU"
+"CPU","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","CPU"
+"CPU","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=1","support","1","yes","CPU"
+"CPU","SCALE","type=f32,ne=[100,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","CPU"
 "CPU","SILU_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CPU"
 "CPU","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CPU"
-"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CPU"
+"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000,inplace=0","support","1","yes","CPU"
 "CPU","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","yes","CPU"
-"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","yes","CPU"
+"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000,inplace=0","support","1","yes","CPU"
 "CPU","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000000","support","1","yes","CPU"
 "CPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CPU"
 "CPU","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","yes","CPU"
-"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","yes","CPU"
+"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=0","support","1","yes","CPU"
 "CPU","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","yes","CPU"
-"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","yes","CPU"
+"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001,inplace=0","support","1","yes","CPU"
 "CPU","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CPU"
 "CPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CPU"
 "CPU","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","yes","CPU"
-"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","yes","CPU"
+"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100,inplace=0","support","1","yes","CPU"
 "CPU","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","yes","CPU"
-"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","yes","CPU"
+"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100,inplace=0","support","1","yes","CPU"
 "CPU","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000100","support","1","yes","CPU"
 "CPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CPU"
 "CPU","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","yes","CPU"
-"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","yes","CPU"
+"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000,inplace=0","support","1","yes","CPU"
 "CPU","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","yes","CPU"
-"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","yes","CPU"
+"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000,inplace=0","support","1","yes","CPU"
 "CPU","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","yes","CPU"
 "CPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CPU"
-"CPU","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=0","support","1","yes","CPU"
-"CPU","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=1","support","1","yes","CPU"
-"CPU","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=0","support","1","yes","CPU"
-"CPU","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=1","support","1","yes","CPU"
-"CPU","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=0","support","1","yes","CPU"
-"CPU","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=1","support","1","yes","CPU"
-"CPU","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=0","support","1","yes","CPU"
-"CPU","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=1","support","1","yes","CPU"
-"CPU","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=0","support","1","yes","CPU"
-"CPU","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=1","support","1","yes","CPU"
+"CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","CPU"
 "CPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CPU"
 "CPU","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CPU"
 "CPU","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CPU"
 "CPU","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","CPU"
 "CPU","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","CPU"
 "CPU","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CPU"
-"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=64,n=77,k=77,bs=[12,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=576,n=512,k=576,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CPU"
+"CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=0,m=32,n=8192,k=64","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=50,n=200,k=64","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=1,m=32,n=8192,k=64","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=50,n=200,k=64","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=1,n_used=1,b=0,m=8,n=16,k=1","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=32,n_used=2,b=0,m=2880,n=32,k=2880","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CPU"
+"CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CPU"
 "CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CPU"
 "CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CPU"
 "CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CPU"
 "CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CPU"
+"CPU","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CPU"
 "CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CPU"
 "CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CPU"
 "CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CPU"
 "CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CPU"
 "CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CPU"
 "CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","CPU"
+"CPU","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","CPU"
 "CPU","SQR","type=f16,ne=[10,5,4,3]","support","1","yes","CPU"
 "CPU","SQRT","type=f16,ne=[10,3,3,2]","support","1","yes","CPU"
 "CPU","LOG","type=f16,ne=[10,5,4,3]","support","1","yes","CPU"
 "CPU","SIN","type=f16,ne=[10,2,2,2]","support","1","yes","CPU"
 "CPU","COS","type=f16,ne=[10,2,2,2]","support","1","yes","CPU"
 "CPU","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","CPU"
+"CPU","LEAKY_RELU","type=f16,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CPU"
+"CPU","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","CPU"
+"CPU","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","CPU"
+"CPU","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","CPU"
+"CPU","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","CPU"
+"CPU","SQR","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","SQRT","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","LOG","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","SIN","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","COS","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CPU"
+"CPU","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CPU"
+"CPU","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
 "CPU","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
 "CPU","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","CPU"
 "CPU","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
 "CPU","SIN","type=f32,ne=[10,2,2,2]","support","1","yes","CPU"
 "CPU","COS","type=f32,ne=[10,2,2,2]","support","1","yes","CPU"
 "CPU","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","CPU"
+"CPU","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CPU"
+"CPU","FLOOR","type=f32,ne=[10,2,2,2]","support","1","yes","CPU"
+"CPU","CEIL","type=f32,ne=[10,2,2,2]","support","1","yes","CPU"
+"CPU","ROUND","type=f32,ne=[10,2,2,2]","support","1","yes","CPU"
+"CPU","TRUNC","type=f32,ne=[10,2,2,2]","support","1","yes","CPU"
+"CPU","SQR","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","SQRT","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","LOG","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","SIN","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","COS","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","CLAMP","type=f32,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CPU"
+"CPU","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CPU"
+"CPU","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
+"CPU","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
 "CPU","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","CPU"
 "CPU","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","CPU"
 "CPU","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
-"CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
+"CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
 "CPU","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
-"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CPU"
+"CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CPU"
+"CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CPU"
 "CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","CPU"
 "CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","CPU"
 "CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","CPU"
 "CPU","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","CPU"
 "CPU","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","CPU"
 "CPU","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","CPU"
+"CPU","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CPU"
+"CPU","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","CPU"
+"CPU","ARGSORT","type=f32,ne=[2,8,8192,1],order=0","support","1","yes","CPU"
 "CPU","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","CPU"
 "CPU","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","CPU"
 "CPU","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","CPU"
+"CPU","ARGSORT","type=f32,ne=[1024,1,1,1],order=1","support","1","yes","CPU"
+"CPU","ARGSORT","type=f32,ne=[16384,1,1,1],order=1","support","1","yes","CPU"
+"CPU","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","CPU"
 "CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","CPU"
 "CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","CPU"
-"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=0","support","1","yes","CPU"
-"CPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=0","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=nearest,flags=none","support","1","yes","CPU"
 "CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=0","support","1","yes","CPU"
 "CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=1","support","1","yes","CPU"
-"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=1","support","1","yes","CPU"
-"CPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=1","support","1","yes","CPU"
-"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=257","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=none","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear,flags=none","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=0","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=1","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=none","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic,flags=none","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=align_corners","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear,flags=align_corners","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear,flags=align_corners","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=align_corners","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bicubic,flags=align_corners","support","1","yes","CPU"
+"CPU","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bicubic,flags=align_corners","support","1","yes","CPU"
 "CPU","SUM","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
-"CPU","SUM_ROWS","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
+"CPU","SUM_ROWS","type=f32,ne=[10,5,4,3],permute=0,slice=0","support","1","yes","CPU"
+"CPU","SUM","type=f32,ne=[11,5,6,3],permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","SUM","type=f32,ne=[11,5,6,3],permute=[0,3,2,1]","support","1","yes","CPU"
+"CPU","SUM","type=f32,ne=[11,5,6,3],permute=[0,1,3,2]","support","1","yes","CPU"
+"CPU","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=1,slice=0","support","1","yes","CPU"
+"CPU","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=0,slice=1","support","1","yes","CPU"
+"CPU","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=1,slice=1","support","1","yes","CPU"
 "CPU","MEAN","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
+"CPU","SUM","type=f32,ne=[33,1,1,1]","support","1","yes","CPU"
+"CPU","SUM_ROWS","type=f32,ne=[33,1,1,1],permute=0,slice=0","support","1","yes","CPU"
+"CPU","MEAN","type=f32,ne=[33,1,1,1]","support","1","yes","CPU"
+"CPU","SUM","type=f32,ne=[33,1024,1,1]","support","1","yes","CPU"
+"CPU","SUM_ROWS","type=f32,ne=[33,1024,1,1],permute=0,slice=0","support","1","yes","CPU"
+"CPU","SUM","type=f32,ne=[33,256,1,1]","support","1","yes","CPU"
+"CPU","SUM","type=f32,ne=[33,256,1,1],permute=[1,0,2,3]","support","1","yes","CPU"
+"CPU","SUM_ROWS","type=f32,ne=[33,256,1,1],permute=0,slice=0","support","1","yes","CPU"
+"CPU","MEAN","type=f32,ne=[33,256,1,1]","support","1","yes","CPU"
+"CPU","MEAN","type=f32,ne=[32769,1,1,1]","support","1","yes","CPU"
 "CPU","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","CPU"
 "CPU","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","CPU"
 "CPU","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","CPU"
 "CPU","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","CPU"
+"CPU","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","1","yes","CPU"
 "CPU","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","CPU"
+"CPU","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","CPU"
 "CPU","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","1","yes","CPU"
 "CPU","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","CPU"
 "CPU","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","CPU"
 "CPU","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
-"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","CUMSUM","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
+"CPU","XIELU","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
+"CPU","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","1","yes","CPU"
+"CPU","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","1","yes","CPU"
+"CPU","TRI","type=f32,ne=[10,10,4,3],tri_type=1","support","1","yes","CPU"
+"CPU","TRI","type=f32,ne=[10,10,4,3],tri_type=0","support","1","yes","CPU"
+"CPU","FILL","type=f32,ne=[10,10,4,3],c=0.000000","support","1","yes","CPU"
+"CPU","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","CPU"
+"CPU","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","CPU"
+"CPU","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","1","yes","CPU"
+"CPU","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","1","yes","CPU"
+"CPU","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","1","yes","CPU"
+"CPU","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","1","yes","CPU"
+"CPU","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","1","yes","CPU"
+"CPU","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","1","yes","CPU"
+"CPU","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","1","yes","CPU"
+"CPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","CPU"
+"CPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","CPU"
+"CPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","1","yes","CPU"
+"CPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CPU"
+"CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CPU"
 "CPU","CROSS_ENTROPY_LOSS","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
 "CPU","CROSS_ENTROPY_LOSS","type=f32,ne=[30000,1,1,1]","support","1","yes","CPU"
 "CPU","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
 "CPU","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","1","yes","CPU"
 "CPU","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
+"CPU","OPT_STEP_SGD","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
index 4589bb51e0ecdf4c16e48f1749442adac707d92d..56ba32273a70c53780d396a170d3b109dc715130 100644 (file)
 "CUDA0","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","EXP","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
 "CUDA0","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","EXPM1","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","EXPM1","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","SOFTPLUS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
 "CUDA0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","EXP","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","EXPM1","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","EXPM1","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","SOFTPLUS","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
 "CUDA0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
 "CUDA0","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","EXP","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
 "CUDA0","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","EXPM1","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","EXPM1","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","SOFTPLUS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","SOFTPLUS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
 "CUDA0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
 "CUDA0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","EXP","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","EXPM1","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","EXPM1","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","SOFTPLUS","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","SOFTPLUS","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
 "CUDA0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
+"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
+"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
 "CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","CUDA"
 "CUDA0","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","CUDA"
 "CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","yes","CUDA"
 "CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","yes","CUDA"
 "CUDA0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","yes","CUDA"
 "CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=0","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=1","support","0","no","CUDA"
-"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=0","support","1","yes","CUDA"
-"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=1","support","1","yes","CUDA"
+"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=2.000000","support","1","yes","CUDA"
+"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=0.500000,limit=7.000000","support","1","yes","CUDA"
+"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=2.000000","support","1","yes","CUDA"
+"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=0,alpha=1.702000,limit=7.000000","support","1","yes","CUDA"
+"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=2.000000","support","1","yes","CUDA"
+"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=0.500000,limit=7.000000","support","1","yes","CUDA"
+"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=2.000000","support","1","yes","CUDA"
+"CUDA0","SWIGLU_OAI","type=f32,ne_a=[128,2,2,2],v=1,alpha=1.702000,limit=7.000000","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f32,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f32,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_0,n=76800,m=5,r=4,be1=1,be2=2,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_0,n=256,m=80000,r=70000,be1=2,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=700,be2=100,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f32,n=1,m=8,r=2,be1=1,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=mxfp4,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=1,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=1,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=7,be2=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,be1=7,be2=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=1,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=1,be2=1,v=1","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=7,be2=1,v=0","support","1","yes","CUDA"
+"CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,be1=7,be2=1,v=1","support","1","yes","CUDA"
 "CUDA0","GET_ROWS_BACK","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","yes","CUDA"
 "CUDA0","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","yes","CUDA"
 "CUDA0","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","yes","CUDA"
 "CUDA0","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
 "CUDA0","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
 "CUDA0","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
+"CUDA0","GET_ROWS_BACK","type=mxfp4,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
+"CUDA0","GET_ROWS_BACK","type=mxfp4,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
 "CUDA0","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
 "CUDA0","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
 "CUDA0","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
 "CUDA0","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
 "CUDA0","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=0","support","0","no","CUDA"
 "CUDA0","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
-"CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f32,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=f16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=bf16,type_idx=i64,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q4_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q5_1,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q3_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q4_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q5_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=q6_K,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq2_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_xxs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq1_m,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq4_nl,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq3_s,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","CUDA"
+"CUDA0","SET_ROWS","type=iq4_xs,type_idx=i64,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","CUDA"
 "CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","yes","CUDA"
 "CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","yes","CUDA"
 "CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","yes","CUDA"
 "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","CUDA"
 "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","CUDA"
 "CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=1,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=3,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,1,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,2,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,3,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[1,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[2,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[3,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,1,2],ne_kernel=[11,11,1,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,1],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,1,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,2,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,1,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,3,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[1,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[2,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[1,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[3,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D","ne_input=[141,133,25,2],ne_kernel=[11,11,25,12],stride0=3,stride1=5,padding0=5,padding1=5,dilation0=2,dilation1=4,cwhn=0","support","0","no","CUDA"
-"CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=0","support","1","yes","CUDA"
-"CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","1","yes","CUDA"
-"CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","1","yes","CUDA"
-"CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[197,32,1,1],ne_kernel=[16,32,32,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,1,2,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","yes","CUDA"
-"CUDA0","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","yes","CUDA"
-"CUDA0","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","CUDA"
-"CUDA0","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","CUDA"
-"CUDA0","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","CUDA"
-"CUDA0","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","CUDA"
-"CUDA0","ARGMAX","type=f32,ne=[1024,10,1,1]","support","1","yes","CUDA"
-"CUDA0","ARGMAX","type=f32,ne=[1024,12,1,1]","support","1","yes","CUDA"
-"CUDA0","ARGMAX","type=f32,ne=[2000,10,1,1]","support","1","yes","CUDA"
-"CUDA0","ARGMAX","type=f32,ne=[5438,3,1,1]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[2,1,1,1]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,2,1,1]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,2,1]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,2]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=i32,ne=[10,5,4,1],nr=[2,1,1,1]","support","0","no","CUDA"
-"CUDA0","REPEAT","type=i16,ne=[10,5,4,1],nr=[1,1,1,2]","support","0","no","CUDA"
-"CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","yes","CUDA"
-"CUDA0","REPEAT","type=i32,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","no","CUDA"
-"CUDA0","REPEAT","type=i16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","no","CUDA"
-"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=0","support","1","yes","CUDA"
-"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=0","support","1","yes","CUDA"
-"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=0","support","1","yes","CUDA"
-"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=0","support","1","yes","CUDA"
-"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=1","support","1","yes","CUDA"
-"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=1","support","1","yes","CUDA"
-"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=1","support","1","yes","CUDA"
-"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=1","support","1","yes","CUDA"
-"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=1","support","1","yes","CUDA"
-"CUDA0","DUP","type=f32,ne=[10,10,20,1]","support","1","yes","CUDA"
-"CUDA0","DUP","type=f16,ne=[10,10,20,1]","support","1","yes","CUDA"
-"CUDA0","DUP","type=i32,ne=[10,10,20,1]","support","0","no","CUDA"
-"CUDA0","DUP","type=i16,ne=[10,10,20,1]","support","0","no","CUDA"
-"CUDA0","DUP","type=f32,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","DUP","type=f16,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","DUP","type=f32,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","yes","CUDA"
-"CUDA0","DUP","type=f16,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","yes","CUDA"
-"CUDA0","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","0","no","CUDA"
-"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","0","no","CUDA"
-"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","0","no","CUDA"
-"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","0","no","CUDA"
-"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","0","no","CUDA"
-"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","0","no","CUDA"
-"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q4_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q3_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q4_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q5_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=q6_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq2_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq1_m,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq3_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","no","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","yes","CUDA"
-"CUDA0","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","CUDA"
-"CUDA0","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","CUDA"
-"CUDA0","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","CUDA"
-"CUDA0","CONT","type=f32,ne=[2,3,5,7]","support","1","yes","CUDA"
-"CUDA0","CONT","type=f16,ne=[2,1,1,1]","support","1","yes","CUDA"
-"CUDA0","CONT","type=f16,ne=[2,1,3,5]","support","1","yes","CUDA"
-"CUDA0","CONT","type=f16,ne=[2,3,5,7]","support","1","yes","CUDA"
-"CUDA0","CONT","type=bf16,ne=[2,1,1,1]","support","1","yes","CUDA"
-"CUDA0","CONT","type=bf16,ne=[2,1,3,5]","support","1","yes","CUDA"
-"CUDA0","CONT","type=bf16,ne=[2,3,5,7]","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","SUB","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","MUL","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","DIV","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=2","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[1,2,1,1],nf=3","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=4","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[1,1,1,2],nf=5","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=6","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=7","support","1","yes","CUDA"
-"CUDA0","ADD","type=f32,ne=[16,5,4,3],nr=[2,2,2,2],nf=8","support","1","yes","CUDA"
-"CUDA0","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000","support","1","yes","CUDA"
-"CUDA0","SILU_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CUDA"
-"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CUDA"
-"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CUDA"
-"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","yes","CUDA"
-"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000000","support","1","yes","CUDA"
-"CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA"
-"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","yes","CUDA"
-"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","yes","CUDA"
-"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","yes","CUDA"
-"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CUDA"
-"CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA"
-"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","yes","CUDA"
-"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","yes","CUDA"
-"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","yes","CUDA"
-"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000100","support","1","yes","CUDA"
-"CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA"
-"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","yes","CUDA"
-"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","yes","CUDA"
-"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","yes","CUDA"
-"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","yes","CUDA"
-"CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=0","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000000,broadcast=1","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=0","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000001,broadcast=1","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=0","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.000100,broadcast=1","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=0","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=0.100000,broadcast=1","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=0","support","1","yes","CUDA"
-"CUDA0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=1","support","1","yes","CUDA"
-"CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","1","yes","CUDA"
-"CUDA0","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
-"CUDA0","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
-"CUDA0","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
-"CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","yes","CUDA"
-"CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","CUDA"
-"CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
-"CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","yes","CUDA"
-"CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","yes","CUDA"
-"CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","CUDA"
-"CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
-"CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","yes","CUDA"
-"CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","yes","CUDA"
-"CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","CUDA"
-"CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
-"CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","no","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","yes","CUDA"
-"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
-"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
-"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
-"CUDA0","SQR","type=f16,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","SQRT","type=f16,ne=[10,3,3,2]","support","1","yes","CUDA"
-"CUDA0","LOG","type=f16,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","SIN","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","COS","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","CUDA"
-"CUDA0","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","CUDA"
-"CUDA0","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","SIN","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","COS","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","CUDA"
-"CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","CUDA"
-"CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","CUDA"
-"CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","0","no","CUDA"
-"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","CUDA"
-"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","0","no","CUDA"
-"CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","CUDA"
-"CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","CUDA"
-"CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","CUDA"
-"CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","CUDA"
-"CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","CUDA"
-"CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","CUDA"
-"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","CUDA"
-"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","CUDA"
-"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=0","support","1","yes","CUDA"
-"CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=0","support","1","yes","CUDA"
-"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=0","support","1","yes","CUDA"
-"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=1","support","1","yes","CUDA"
-"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=1","support","1","yes","CUDA"
-"CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=1","support","1","yes","CUDA"
-"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=257","support","1","yes","CUDA"
-"CUDA0","SUM","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","SUM_ROWS","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","MEAN","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","CUDA"
-"CUDA0","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","CUDA"
-"CUDA0","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","CUDA"
-"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","CUDA"
-"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","0","no","CUDA"
-"CUDA0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","CUDA"
-"CUDA0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","CUDA"
-"CUDA0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","CUDA"
-"CUDA0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
-"CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA"
-"CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA"
-"CUDA0","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
-"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
-"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
-"CUDA0","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
-"CUDA0","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
-"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
-"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
-"CUDA0","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
-"CUDA0","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
-"CUDA0","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
-"CUDA0","CEIL","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
-"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
-"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
-"CUDA0","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
-"CUDA0","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
-"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
-"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
-"CUDA0","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
-"CUDA0","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
-"CUDA0","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
-"CUDA0","FLOOR","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
-"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
-"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
-"CUDA0","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
-"CUDA0","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
-"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
-"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
-"CUDA0","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
-"CUDA0","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
-"CUDA0","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
-"CUDA0","ROUND","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
-"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
-"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
-"CUDA0","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
-"CUDA0","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
-"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CUDA"
-"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CUDA"
-"CUDA0","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","CUDA"
-"CUDA0","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","CUDA"
-"CUDA0","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
-"CUDA0","TRUNC","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
-"CUDA0","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=1,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=1,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=1,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=0,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=0,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=0,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=1,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=1,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=1,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=1,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=0","support","1","yes","CUDA"
+"CUDA0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,10,3],ne_kernel=[3,3,3,3],IC=3,s0=3,s1=3,s2=3,p0=3,p1=3,p2=3,d0=3,d1=3,d2=3,v=1","support","1","yes","CUDA"
 "CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA"
 "CUDA0","CONV_2D","ne_input=[1,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f16,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA"
 "CUDA0","CONV_2D","ne_input=[141,1,1,2],ne_kernel=[1,1,1,1],type_kernel=f32,stride0=1,stride1=5,padding0=5,padding1=2,dilation0=2,dilation1=4,cwhn=0","support","1","yes","CUDA"
 "CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","1","yes","CUDA"
 "CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","1","yes","CUDA"
 "CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","1","yes","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=4,ID=8,IH=8,IW=8,OC=8,KD=1,KH=1,KW=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f32","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=1,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=1,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=0,p1=0,p2=0,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=3,KW=3,s0=2,s1=2,s2=2,p0=1,p1=1,p2=1,d0=2,d1=2,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=2,IC=3,ID=18,IH=22,IW=20,OC=4,KD=3,KH=1,KW=5,s0=2,s1=1,s2=1,p0=2,p1=0,p2=1,d0=1,d1=1,d2=2,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_3D","N=1,IC=4,ID=8,IH=8,IW=8,OC=8,KD=1,KH=1,KW=1,s0=1,s1=1,s2=1,p0=0,p1=0,p2=0,d0=1,d1=1,d2=1,type_kernel=f16","support","0","no","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[197,32,1,1],ne_kernel=[16,32,32,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=3,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=2,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,1,2,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","yes","CUDA"
+"CUDA0","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","yes","CUDA"
+"CUDA0","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","CUDA"
+"CUDA0","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","CUDA"
+"CUDA0","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","CUDA"
+"CUDA0","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","CUDA"
+"CUDA0","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","CUDA"
+"CUDA0","ARGMAX","type=f32,ne=[1024,10,1,1]","support","1","yes","CUDA"
+"CUDA0","ARGMAX","type=f32,ne=[1024,12,1,1]","support","1","yes","CUDA"
+"CUDA0","ARGMAX","type=f32,ne=[2000,10,1,1]","support","1","yes","CUDA"
+"CUDA0","ARGMAX","type=f32,ne=[5438,3,1,1]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[2,1,1,1]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,2,1,1]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,2,1]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,2]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=i32,ne=[10,5,4,1],nr=[2,1,1,1]","support","0","no","CUDA"
+"CUDA0","REPEAT","type=i16,ne=[10,5,4,1],nr=[1,1,1,2]","support","0","no","CUDA"
+"CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","yes","CUDA"
+"CUDA0","REPEAT","type=i32,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","no","CUDA"
+"CUDA0","REPEAT","type=i16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","no","CUDA"
+"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=0","support","1","yes","CUDA"
+"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=0","support","1","yes","CUDA"
+"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=0","support","1","yes","CUDA"
+"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=0","support","1","yes","CUDA"
+"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=0","support","1","yes","CUDA"
+"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=1","support","1","yes","CUDA"
+"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=1","support","1","yes","CUDA"
+"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=1","support","1","yes","CUDA"
+"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=1","support","1","yes","CUDA"
+"CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=1","support","1","yes","CUDA"
+"CUDA0","DUP","type=f32,ne=[10,10,20,1]","support","1","yes","CUDA"
+"CUDA0","DUP","type=f16,ne=[10,10,20,1]","support","1","yes","CUDA"
+"CUDA0","DUP","type=i32,ne=[10,10,20,1]","support","0","no","CUDA"
+"CUDA0","DUP","type=i16,ne=[10,10,20,1]","support","0","no","CUDA"
+"CUDA0","DUP","type=f32,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","DUP","type=f16,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","DUP","type=f32,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","yes","CUDA"
+"CUDA0","DUP","type=f16,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","yes","CUDA"
+"CUDA0","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","0","no","CUDA"
+"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","1","yes","CUDA"
+"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","1","yes","CUDA"
+"CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","1","yes","CUDA"
+"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","1","yes","CUDA"
+"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","1","yes","CUDA"
+"CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=mxfp4,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=mxfp4,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=mxfp4,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q4_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=mxfp4,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q3_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q4_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q5_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=q6_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq2_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq1_m,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq3_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=i32,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=i32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=i32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=i32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,4,3,3],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CUDA"
+"CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CUDA"
+"CUDA0","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","CUDA"
+"CUDA0","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","CUDA"
+"CUDA0","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","CUDA"
+"CUDA0","CONT","type=f32,ne=[2,3,5,7]","support","1","yes","CUDA"
+"CUDA0","CONT","type=f16,ne=[2,1,1,1]","support","1","yes","CUDA"
+"CUDA0","CONT","type=f16,ne=[2,1,3,5]","support","1","yes","CUDA"
+"CUDA0","CONT","type=f16,ne=[2,3,5,7]","support","1","yes","CUDA"
+"CUDA0","CONT","type=bf16,ne=[2,1,1,1]","support","1","yes","CUDA"
+"CUDA0","CONT","type=bf16,ne=[2,1,3,5]","support","1","yes","CUDA"
+"CUDA0","CONT","type=bf16,ne=[2,3,5,7]","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f16,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f16,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f16,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f16,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1,1,1,1],nr=[32,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1,1,320,320],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[10,5,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[10,5,4,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,2],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,2,2,2],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1,1,65536,1],nr=[256,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[1,1,640,1],nr=[32,32,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[640,1,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","SUB","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","MUL","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","DIV","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","CUDA"
+"CUDA0","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
+"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=1","support","1","yes","CUDA"
+"CUDA0","SCALE","type=f32,ne=[100,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SILU_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CUDA"
+"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","yes","CUDA"
+"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","yes","CUDA"
+"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000000","support","1","yes","CUDA"
+"CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA"
+"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","yes","CUDA"
+"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=0","support","1","yes","CUDA"
+"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","yes","CUDA"
+"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001,inplace=0","support","1","yes","CUDA"
+"CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","yes","CUDA"
+"CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA"
+"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","yes","CUDA"
+"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100,inplace=0","support","1","yes","CUDA"
+"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","yes","CUDA"
+"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100,inplace=0","support","1","yes","CUDA"
+"CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000100","support","1","yes","CUDA"
+"CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA"
+"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","yes","CUDA"
+"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000,inplace=0","support","1","yes","CUDA"
+"CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","yes","CUDA"
+"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000,inplace=0","support","1","yes","CUDA"
+"CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","yes","CUDA"
+"CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA"
+"CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","CUDA"
+"CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","1","yes","CUDA"
+"CUDA0","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
+"CUDA0","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
+"CUDA0","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
+"CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","yes","CUDA"
+"CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","CUDA"
+"CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
+"CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","yes","CUDA"
+"CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","yes","CUDA"
+"CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","CUDA"
+"CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
+"CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","yes","CUDA"
+"CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","yes","CUDA"
+"CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","CUDA"
+"CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","CUDA"
+"CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=mxfp4,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[1536,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","0","no","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=64,n=77,k=77,bs=[12,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=576,n=512,k=576,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[1,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[1,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2112,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,3],nr=[4,1],per=[0,2,1,3],k_v=0,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,3],nr=[4,1],per=[0,1,2,3],k_v=2113,o=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=0,m=32,n=8192,k=64","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=50,n=200,k=64","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=2,n_used=2,b=1,m=32,n=8192,k=64","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=50,n=200,k=64","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=1,n_used=1,b=0,m=8,n=16,k=1","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=32,n_used=2,b=0,m=2880,n=32,k=2880","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=4,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=5,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=17,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","yes","CUDA"
+"CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","1","yes","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=mxfp4,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","no","CUDA"
+"CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","no","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=1,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=2,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=4,n_experts_used=4,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=1,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=2,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=32,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","CUDA"
+"CUDA0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","CUDA"
+"CUDA0","SQR","type=f16,ne=[10,5,4,3]","support","1","yes","CUDA"
+"CUDA0","SQRT","type=f16,ne=[10,3,3,2]","support","1","yes","CUDA"
+"CUDA0","LOG","type=f16,ne=[10,5,4,3]","support","1","yes","CUDA"
+"CUDA0","SIN","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","COS","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","CUDA"
+"CUDA0","LEAKY_RELU","type=f16,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","SQR","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","SQRT","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","LOG","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","SIN","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","COS","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CUDA"
+"CUDA0","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
+"CUDA0","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","CUDA"
+"CUDA0","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
+"CUDA0","SIN","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","COS","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","CUDA"
+"CUDA0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f32,ne=[10,2,2,2]","support","1","yes","CUDA"
+"CUDA0","SQR","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","SQRT","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","LOG","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","SIN","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","COS","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","CLAMP","type=f32,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","CUDA"
+"CUDA0","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","CUDA"
+"CUDA0","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","CUDA"
+"CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","CUDA"
+"CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","CUDA"
+"CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,sinks=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=1","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=1.000000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=0.100000,max_bias=0.000000","support","1","yes","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=1.000000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,2,3],scale=0.100000,max_bias=8.000000","support","0","no","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=0","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0,inplace=1","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","0","no","CUDA"
+"CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","CUDA"
+"CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","0","no","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[2,8,8192,1],order=0","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[1024,1,1,1],order=1","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[16384,1,1,1],order=1","support","1","yes","CUDA"
+"CUDA0","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=nearest,flags=none","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=0","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=1","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=none","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear,flags=none","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=0","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=1","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=none","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic,flags=none","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=align_corners","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear,flags=align_corners","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear,flags=align_corners","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=align_corners","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bicubic,flags=align_corners","support","1","yes","CUDA"
+"CUDA0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bicubic,flags=align_corners","support","1","yes","CUDA"
+"CUDA0","SUM","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
+"CUDA0","SUM_ROWS","type=f32,ne=[10,5,4,3],permute=0,slice=0","support","1","yes","CUDA"
+"CUDA0","SUM","type=f32,ne=[11,5,6,3],permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","SUM","type=f32,ne=[11,5,6,3],permute=[0,3,2,1]","support","1","yes","CUDA"
+"CUDA0","SUM","type=f32,ne=[11,5,6,3],permute=[0,1,3,2]","support","1","yes","CUDA"
+"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=1,slice=0","support","0","no","CUDA"
+"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=0,slice=1","support","0","no","CUDA"
+"CUDA0","SUM_ROWS","type=f32,ne=[11,5,6,3],permute=1,slice=1","support","0","no","CUDA"
+"CUDA0","MEAN","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
+"CUDA0","SUM","type=f32,ne=[33,1,1,1]","support","1","yes","CUDA"
+"CUDA0","SUM_ROWS","type=f32,ne=[33,1,1,1],permute=0,slice=0","support","1","yes","CUDA"
+"CUDA0","MEAN","type=f32,ne=[33,1,1,1]","support","1","yes","CUDA"
+"CUDA0","SUM","type=f32,ne=[33,1024,1,1]","support","1","yes","CUDA"
+"CUDA0","SUM_ROWS","type=f32,ne=[33,1024,1,1],permute=0,slice=0","support","1","yes","CUDA"
+"CUDA0","SUM","type=f32,ne=[33,256,1,1]","support","1","yes","CUDA"
+"CUDA0","SUM","type=f32,ne=[33,256,1,1],permute=[1,0,2,3]","support","0","no","CUDA"
+"CUDA0","SUM_ROWS","type=f32,ne=[33,256,1,1],permute=0,slice=0","support","1","yes","CUDA"
+"CUDA0","MEAN","type=f32,ne=[33,256,1,1]","support","1","yes","CUDA"
+"CUDA0","MEAN","type=f32,ne=[32769,1,1,1]","support","1","yes","CUDA"
+"CUDA0","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","CUDA"
+"CUDA0","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","CUDA"
+"CUDA0","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","CUDA"
+"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","CUDA"
+"CUDA0","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","1","yes","CUDA"
+"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","CUDA"
+"CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","CUDA"
+"CUDA0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","1","yes","CUDA"
+"CUDA0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","CUDA"
+"CUDA0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","CUDA"
+"CUDA0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CUDA"
+"CUDA0","CUMSUM","type=f32,ne=[10,5,4,3]","support","0","no","CUDA"
+"CUDA0","XIELU","type=f32,ne=[10,5,4,3]","support","0","no","CUDA"
+"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","0","no","CUDA"
+"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","0","no","CUDA"
+"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=1","support","0","no","CUDA"
+"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=0","support","0","no","CUDA"
+"CUDA0","FILL","type=f32,ne=[10,10,4,3],c=0.000000","support","0","no","CUDA"
+"CUDA0","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","0","no","CUDA"
+"CUDA0","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","0","no","CUDA"
+"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","CUDA"
+"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","CUDA"
+"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","CUDA"
+"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","CUDA"
+"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","CUDA"
+"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","CUDA"
+"CUDA0","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","CUDA"
+"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","CUDA"
+"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","CUDA"
+"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","0","no","CUDA"
+"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=96,hsv=96,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","yes","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,sinks=0,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=113,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","no","CUDA"
+"CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
+"CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA"
+"CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
+"CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","1","yes","CUDA"
+"CUDA0","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
+"CUDA0","OPT_STEP_SGD","type=f32,ne=[10,5,4,3]","support","1","yes","CUDA"
index c1ed1a21c81c4109aa15420eacf64bb27d635fe6..605fcfcb9c29f7fe870c8f4b1288038a25d9d9f7 100644 (file)
@@ -475,6 +475,7 @@ extern "C" {
         GGML_OP_COS,
         GGML_OP_SUM,
         GGML_OP_SUM_ROWS,
+        GGML_OP_CUMSUM,
         GGML_OP_MEAN,
         GGML_OP_ARGMAX,
         GGML_OP_COUNT_EQUAL,
@@ -530,6 +531,8 @@ extern "C" {
         GGML_OP_TIMESTEP_EMBEDDING,
         GGML_OP_ARGSORT,
         GGML_OP_LEAKY_RELU,
+        GGML_OP_TRI,
+        GGML_OP_FILL,
 
         GGML_OP_FLASH_ATTN_EXT,
         GGML_OP_FLASH_ATTN_BACK,
@@ -542,6 +545,7 @@ extern "C" {
         GGML_OP_RWKV_WKV6,
         GGML_OP_GATED_LINEAR_ATTN,
         GGML_OP_RWKV_WKV7,
+        GGML_OP_SOLVE_TRI,
 
         GGML_OP_UNARY,
 
@@ -576,6 +580,8 @@ extern "C" {
         GGML_UNARY_OP_HARDSWISH,
         GGML_UNARY_OP_HARDSIGMOID,
         GGML_UNARY_OP_EXP,
+        GGML_UNARY_OP_EXPM1,
+        GGML_UNARY_OP_SOFTPLUS,
         GGML_UNARY_OP_GELU_ERF,
         GGML_UNARY_OP_XIELU,
         GGML_UNARY_OP_FLOOR,
@@ -620,6 +626,13 @@ extern "C" {
         GGML_TENSOR_FLAG_LOSS   =  8, // ...defines loss for numerical optimization (multiple loss tensors add up)
     };
 
+    enum ggml_tri_type {
+        GGML_TRI_TYPE_UPPER_DIAG = 0,
+        GGML_TRI_TYPE_UPPER      = 1,
+        GGML_TRI_TYPE_LOWER_DIAG = 2,
+        GGML_TRI_TYPE_LOWER      = 3
+    };
+
     struct ggml_init_params {
         // memory pool
         size_t mem_size;   // bytes
@@ -957,6 +970,22 @@ extern "C" {
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
 
+    GGML_API struct ggml_tensor * ggml_expm1(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_expm1_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_softplus(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_softplus_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
     GGML_API struct ggml_tensor * ggml_sin(
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
@@ -983,6 +1012,10 @@ extern "C" {
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
 
+    GGML_API struct ggml_tensor * ggml_cumsum(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a);
+
     // mean along rows
     GGML_API struct ggml_tensor * ggml_mean(
             struct ggml_context * ctx,
@@ -2187,6 +2220,23 @@ extern "C" {
             int                   shift2,
             int                   shift3);
 
+    // Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
+    // zeroes everywhere outside the masked area
+    GGML_API struct ggml_tensor * ggml_tri(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            enum ggml_tri_type    type);
+
+    // Fill tensor a with constant c
+    GGML_API struct ggml_tensor * ggml_fill(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            float                 c);
+
+    GGML_API struct ggml_tensor * ggml_fill_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            float                 c);
 
     // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
     // timesteps: [N,]
@@ -2356,6 +2406,27 @@ extern "C" {
             struct ggml_tensor  * b,
             struct ggml_tensor  * state);
 
+    /* Solves a specific equation of the form Ax=B, where A is a triangular matrix
+    *  without zeroes on the diagonal (i.e. invertible).
+    *  B can have any number of columns, but must have the same number of rows as A
+    *  If A is [n, n] and B is [n, m], then the result will be [n, m] as well
+    *  Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
+    *  where n > 100 sparingly, pre-chunk if necessary.
+    *
+    *  If left = false, solves xA=B instead
+    *  If lower = false, assumes upper triangular instead
+    *  If uni = true, assumes diagonal of A to be all ones (will override actual values)
+    *
+    *  TODO: currently only lower, right, non-unitriangular variant is implemented
+    */
+    GGML_API struct ggml_tensor * ggml_solve_tri(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        struct ggml_tensor  * b,
+        bool                  left,
+        bool                  lower,
+        bool                  uni);
+
     // custom operators
 
     typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
index d8e3c48c609fbe4aa9b1c6130f410eebb430bb63..c7348cc26c10ca451a4a7a2693f261ee6d4c1611 100644 (file)
@@ -1731,6 +1731,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
                 ggml_compute_forward_sum_rows(params, tensor);
             } break;
+        case GGML_OP_CUMSUM:
+            {
+                ggml_compute_forward_cumsum(params, tensor);
+            } break;
         case GGML_OP_MEAN:
             {
                 ggml_compute_forward_mean(params, tensor);
@@ -1927,6 +1931,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
                 ggml_compute_forward_leaky_relu(params, tensor);
             } break;
+        case GGML_OP_TRI:
+            {
+                ggml_compute_forward_tri(params, tensor);
+            } break;
+        case GGML_OP_FILL:
+            {
+                ggml_compute_forward_fill(params, tensor);
+            } break;
         case GGML_OP_FLASH_ATTN_EXT:
             {
                 ggml_compute_forward_flash_attn_ext(params, tensor);
@@ -1982,6 +1994,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
                 ggml_compute_forward_rwkv_wkv7(params, tensor);
             } break;
+        case GGML_OP_SOLVE_TRI:
+            {
+                ggml_compute_forward_solve_tri(params, tensor);
+            } break;
         case GGML_OP_MAP_CUSTOM1:
             {
                 ggml_compute_forward_map_custom1(params, tensor);
@@ -2140,6 +2156,9 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
         case GGML_OP_ADD_ID:
         case GGML_OP_ADD1:
         case GGML_OP_ACC:
+        case GGML_OP_CUMSUM:
+        case GGML_OP_TRI:
+        case GGML_OP_FILL:
             {
                 n_tasks = n_threads;
             } break;
@@ -2157,6 +2176,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
                 n_tasks = 1;
             } break;
         case GGML_OP_COUNT_EQUAL:
+        case GGML_OP_SOLVE_TRI:
             {
                 n_tasks = n_threads;
             } break;
@@ -2179,6 +2199,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
                 case GGML_UNARY_OP_HARDSWISH:
                 case GGML_UNARY_OP_HARDSIGMOID:
                 case GGML_UNARY_OP_EXP:
+                case GGML_UNARY_OP_SOFTPLUS:
+                case GGML_UNARY_OP_EXPM1:
                 case GGML_UNARY_OP_FLOOR:
                 case GGML_UNARY_OP_CEIL:
                 case GGML_UNARY_OP_ROUND:
index 09f53b470b26af8b20715c5f6fb3a059c980cf5e..b6209588db1e44ae25dfba0e58ee70da8d201141 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <cfloat>
 #include <algorithm>
+#include <cmath>
 #include <functional>
 
 // ggml_compute_forward_dup
@@ -1395,6 +1396,56 @@ void ggml_compute_forward_sum(
     }
 }
 
+// ggml_compute_forward_cumsum
+
+static void ggml_compute_forward_cumsum_f32(
+        const ggml_compute_params * params,
+        ggml_tensor * dst) {
+
+    const ggml_tensor * src0 = dst->src[0];
+
+    GGML_ASSERT(src0->nb[0] == sizeof(float));
+    GGML_ASSERT(dst->nb[0] == sizeof(float));
+
+    GGML_TENSOR_UNARY_OP_LOCALS
+
+    GGML_ASSERT(ne0 == ne00);
+    GGML_ASSERT(ne1 == ne01);
+    GGML_ASSERT(ne2 == ne02);
+    GGML_ASSERT(ne3 == ne03);
+
+    const auto [ir0, ir1] = get_thread_range(params, src0);
+
+    for (int64_t ir = ir0; ir < ir1; ++ir) {
+        const int64_t i03 = ir/(ne02*ne01);
+        const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
+        const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
+
+        float * src_row = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
+        float * dst_row = (float *) ((char *) dst->data  + i01*nb1  + i02*nb2  + i03*nb3);
+
+        ggml_vec_cumsum_f32(ne00, dst_row, src_row);
+    }
+}
+
+void ggml_compute_forward_cumsum(
+        const ggml_compute_params * params,
+        ggml_tensor * dst) {
+
+    const ggml_tensor * src0 = dst->src[0];
+
+    switch (src0->type) {
+        case GGML_TYPE_F32:
+            {
+                ggml_compute_forward_cumsum_f32(params, dst);
+            } break;
+        default:
+            {
+                GGML_ABORT("fatal error");
+            }
+    }
+}
+
 // ggml_compute_forward_sum_rows
 
 static void ggml_compute_forward_sum_rows_f32(
@@ -2141,6 +2192,83 @@ static void ggml_compute_forward_gelu(
     }
 }
 
+// ggml_compute_fill
+
+static void ggml_compute_forward_fill_f32(const ggml_compute_params * params, ggml_tensor * dst) {
+    const float c = ggml_get_op_params_f32(dst, 0);
+
+    GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+    GGML_TENSOR_LOCALS(size_t,  nb, dst, nb);
+
+    const auto [ir0, ir1] = get_thread_range(params, dst);
+
+    for (int64_t ir = ir0; ir < ir1; ++ir) {
+        const int64_t i03 = ir/(ne2*ne1);
+        const int64_t i02 = (ir - i03*ne2*ne1)/ne1;
+        const int64_t i01 = (ir - i03*ne2*ne1 - i02*ne1);
+
+        float * dst_ptr  = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1);
+
+        ggml_vec_set_f32(ne0, dst_ptr, c);
+    }
+}
+
+void ggml_compute_forward_fill(const ggml_compute_params * params, ggml_tensor * dst) {
+    ggml_compute_forward_fill_f32(params, dst);
+}
+
+// ggml_compute_tri
+
+static void ggml_compute_forward_tri_f32(const ggml_compute_params * params, ggml_tensor * dst) {
+    const ggml_tensor * src0 = dst->src[0];
+
+    const ggml_tri_type ttype = (ggml_tri_type) ggml_get_op_params_i32(dst, 0);
+
+    GGML_ASSERT(ggml_is_contiguous(src0));
+
+    GGML_TENSOR_UNARY_OP_LOCALS
+
+    const auto [ir0, ir1] = get_thread_range(params, src0);
+
+    bool (*bipred)(int, int);
+
+    switch (ttype) {
+        case GGML_TRI_TYPE_LOWER:      bipred = [](int i, int r) { return i <  r; }; break;
+        case GGML_TRI_TYPE_LOWER_DIAG: bipred = [](int i, int r) { return i <= r; }; break;
+        case GGML_TRI_TYPE_UPPER:      bipred = [](int i, int r) { return i >  r; }; break;
+        case GGML_TRI_TYPE_UPPER_DIAG: bipred = [](int i, int r) { return i >= r; }; break;
+        default: GGML_ABORT("invalid tri type");
+    }
+
+    for (int64_t ir = ir0; ir < ir1; ++ir) {
+        const int64_t i03 = ir/(ne02*ne01);
+        const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
+        const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
+
+        const float * src_ptr = (const float  *) ((const char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
+              float * dst_ptr = (      float  *) ((      char *) dst->data  + i03*nb3  + i02*nb2  + i01*nb1);
+
+        for (int i0 = 0; i0 < ne0; ++i0) {
+            dst_ptr[i0] = bipred(i0, i01) ? src_ptr[i0] : 0.0f;
+        }
+    }
+}
+
+void ggml_compute_forward_tri(const ggml_compute_params * params, ggml_tensor * dst) {
+    const ggml_tensor * src0 = dst->src[0];
+
+    switch (src0->type) {
+        case GGML_TYPE_F32:
+            {
+                ggml_compute_forward_tri_f32(params, dst);
+            } break;
+        default:
+            {
+                GGML_ABORT("fatal error");
+            }
+    }
+}
+
 // ggml_compute_forward_gelu_erf
 
 static void ggml_compute_forward_gelu_erf_f32(
@@ -8536,7 +8664,7 @@ static void ggml_compute_forward_ssm_scan_f32(
                 // n_head
                 for (int h = ih0; h < ih1; ++h) {
                     // ref: https://github.com/state-spaces/mamba/blob/62db608da60f6fc790b8ed9f4b3225e95ca15fde/mamba_ssm/ops/triton/softplus.py#L16
-                    const float dt_soft_plus = ggml_softplus(dt[h]);
+                    const float dt_soft_plus = ggml_compute_softplus_f32(dt[h]);
                     const float dA = expf(dt_soft_plus * A[h]);
                     const int g = h / (nh / ng); // repeat_interleave
 
@@ -8633,7 +8761,7 @@ static void ggml_compute_forward_ssm_scan_f32(
                 // n_head
                 for (int h = ih0; h < ih1; ++h) {
                     // ref: https://github.com/state-spaces/mamba/blob/62db608da60f6fc790b8ed9f4b3225e95ca15fde/mamba_ssm/ops/triton/softplus.py#L16
-                    const float dt_soft_plus = ggml_softplus(dt[h]);
+                    const float dt_soft_plus = ggml_compute_softplus_f32(dt[h]);
                     const int g = h / (nh / ng); // repeat_interleave
 
                     // dim
@@ -8916,6 +9044,14 @@ void ggml_compute_forward_unary(
             {
                 ggml_compute_forward_xielu(params, dst);
             } break;
+        case GGML_UNARY_OP_EXPM1:
+            {
+                ggml_compute_forward_expm1(params, dst);
+            } break;
+        case GGML_UNARY_OP_SOFTPLUS:
+            {
+                ggml_compute_forward_softplus(params, dst);
+            } break;
         default:
             {
                 GGML_ABORT("fatal error");
@@ -9512,6 +9648,76 @@ void ggml_compute_forward_gla(
     }
 }
 
+static void ggml_compute_forward_solve_tri_f32(const struct ggml_compute_params * params, struct ggml_tensor * dst) {
+    const struct ggml_tensor * src0 = dst->src[0];  // A (lower triangular)
+    const struct ggml_tensor * src1 = dst->src[1];  // B (RHS)
+
+    GGML_TENSOR_BINARY_OP_LOCALS;
+
+    GGML_ASSERT(src0->type == GGML_TYPE_F32);
+    GGML_ASSERT(src1->type == GGML_TYPE_F32);
+    GGML_ASSERT(dst->type  == GGML_TYPE_F32);
+
+    GGML_ASSERT(ne00 == ne01); // A must be square
+    GGML_ASSERT(ne0  == ne10); // solution cols == B cols
+    GGML_ASSERT(ne1  == ne11); // solution rows == B rows
+
+    GGML_ASSERT(ne02 == ne12 && ne12 == ne2);
+    GGML_ASSERT(ne03 == ne13 && ne13 == ne3);
+
+    const int ith = params->ith;
+    const int nth = params->nth;
+
+    const int64_t k = ne10;   // number of RHS columns
+    const int64_t n = ne11;   // A is n×n
+    const int64_t nr = ne02 * ne03 * k; // we're parallelizing on columns here, so seq x token x column will be the unit
+
+    // chunks per thread
+    const int64_t dr = (nr + nth - 1)/nth;
+
+    // chunk range for this thread
+    const int64_t ir0 = dr*ith;
+    const int64_t ir1 = MIN(ir0 + dr, nr);
+
+    const float * A = (const float *) src0->data;  // [n, n, B1, B2]
+    const float * B = (const float *) src1->data;  // [n, k, B1, B2]
+          float * X = (      float *) dst->data;   // [n, k, B1, B2]
+
+    for (int64_t ir = ir0; ir < ir1; ++ir) {
+        const int64_t i03 = ir/(ne02*k);
+        const int64_t i02 = (ir - i03*ne02*k)/k;
+        const int64_t i01 = (ir - i03*ne02*k - i02*k);
+
+        const float * A_batch = A + i02 * nb02 / sizeof(float) + i03 * nb03 / sizeof(float);
+        const float * B_batch = B + i02 * nb12 / sizeof(float) + i03 * nb13 / sizeof(float);
+
+        float * X_batch = X + i02 * nb2 / sizeof(float) + i03 * nb3 / sizeof(float);
+
+        for (int64_t i00 = 0; i00 < n; ++i00) {
+            float sum = 0.0f;
+            for (int64_t t = 0; t < i00; ++t) {
+                sum += A_batch[i00 * n + t] * X_batch[i01 * n + t];
+            }
+
+            const float diag = A_batch[i00 * n + i00];
+            GGML_ASSERT(diag != 0.0f && "Zero diagonal in triangular matrix");
+
+            X_batch[i01 * n + i00] = (B_batch[i00 * k + i01] - sum) / diag;
+        }
+    }
+}
+
+void ggml_compute_forward_solve_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst) {
+    const ggml_tensor * src0 = dst->src[0];
+    const ggml_tensor * src1 = dst->src[1];
+
+    if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
+        ggml_compute_forward_solve_tri_f32(params, dst);
+    } else {
+        GGML_ABORT("fatal error");
+    }
+}
+
 // ggml_compute_forward_rwkv_wkv7
 
 static void ggml_compute_forward_rwkv_wkv7_f32(
index 2b4127c12b15e92de018adefc5a00f502a88dc0a..98a0eec16d9fec13c77dc0061b9d77e5d3be78b0 100644 (file)
@@ -34,6 +34,7 @@ void ggml_compute_forward_add1(const struct ggml_compute_params * params, struct
 void ggml_compute_forward_acc(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_sum(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_sum_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_cumsum(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_mean(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_argmax(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_count_equal(const struct ggml_compute_params * params, struct ggml_tensor * dst);
@@ -81,6 +82,8 @@ void ggml_compute_forward_arange(const struct ggml_compute_params * params, stru
 void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_leaky_relu(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_fill(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_flash_attn_ext(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_flash_attn_back(
         const struct ggml_compute_params * params,
@@ -96,6 +99,7 @@ void ggml_compute_forward_get_rel_pos(const struct ggml_compute_params * params,
 void ggml_compute_forward_add_rel_pos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_rwkv_wkv6(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_rwkv_wkv7(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_solve_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_gla(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_map_custom1(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_map_custom2(const struct ggml_compute_params * params, struct ggml_tensor * dst);
index a047537b34f78ffabb9cb62cb3bff1cc642f8036..1d9873ad0f230d1140e54c86f5a0b24919a81261 100644 (file)
@@ -73,6 +73,14 @@ static inline float op_log(float x) {
     return logf(x);
 }
 
+static inline float op_expm1(float x) {
+    return expf(x) - 1.0f;
+}
+
+static inline float op_softplus(float x) {
+    return (x > 20.0f) ? x : logf(1.0f + expf(x));
+}
+
 static inline float op_floor(float x) {
     return floorf(x);
 }
@@ -290,6 +298,14 @@ void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor *
     unary_op<op_log>(params, dst);
 }
 
+void ggml_compute_forward_expm1(const ggml_compute_params * params, ggml_tensor * dst) {
+    unary_op<op_expm1>(params, dst);
+}
+
+void ggml_compute_forward_softplus(const ggml_compute_params * params, ggml_tensor * dst) {
+    unary_op<op_softplus>(params, dst);
+}
+
 void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) {
     unary_op<op_floor>(params, dst);
 }
index fa45d9f0e636fc5890432bc73d27ed563c434186..bcad5a3af1a9879718c56ef8e4d9438c0797544f 100644 (file)
@@ -22,6 +22,8 @@ void ggml_compute_forward_sqrt(const struct ggml_compute_params * params, struct
 void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_expm1(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_softplus(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_floor(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_ceil(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_round(const struct ggml_compute_params * params, struct ggml_tensor * dst);
index 65c7dfb6b9a4957d8ac11fb4d0343e0472c912aa..ac59f1fe85b25c94194d75b2ec2c2701e83057ec 100644 (file)
@@ -1416,6 +1416,16 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
 #endif
 }
 
+inline static void ggml_vec_cumsum_f32(const int n, float * y, const float * x) {
+    for (int i = 0; i < n; ++i) {
+        if (i == 0) {
+            y[i] = x[i];
+        } else {
+            y[i] = y[i - 1] + x[i];
+        }
+    }
+}
+
 inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) {
     ggml_float sum = 0.0;
     for (int i = 0; i < n; ++i) {
index 41de87c099958efb2d42f1b789e6bd36410ba7fd..7d792e60cf9c5e32c74b521ec6c67184f646825e 100644 (file)
@@ -2527,6 +2527,12 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
                 case GGML_UNARY_OP_TRUNC:
                     ggml_cuda_op_trunc(ctx, dst);
                     break;
+                case GGML_UNARY_OP_EXPM1:
+                    ggml_cuda_op_expm1(ctx, dst);
+                    break;
+                case GGML_UNARY_OP_SOFTPLUS:
+                    ggml_cuda_op_softplus(ctx, dst);
+                    break;
                 default:
                     return false;
             }
@@ -3829,6 +3835,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
                 case GGML_UNARY_OP_GELU_QUICK:
                 case GGML_UNARY_OP_TANH:
                 case GGML_UNARY_OP_EXP:
+                case GGML_UNARY_OP_EXPM1:
+                case GGML_UNARY_OP_SOFTPLUS:
                 case GGML_UNARY_OP_ELU:
                 case GGML_UNARY_OP_FLOOR:
                 case GGML_UNARY_OP_CEIL:
index c1dc6ddbf8f81ce94e133d9c4307ee32bff30f05..d4866067a4f166ff2019b6cb6ddd44255b66ac93 100644 (file)
@@ -81,6 +81,14 @@ static __device__ __forceinline__ float op_log(float x) {
     return logf(x);
 }
 
+static __device__ __forceinline__ float op_expm1(float x) {
+    return expm1f(x);
+}
+
+static __device__ __forceinline__ float op_softplus(float x) {
+    return (x > 20.0f) ? x : logf(1.0f + expf(x));
+}
+
 static __device__ __forceinline__ float op_elu(float x) {
     return (x > 0.f) ? x : expm1f(x);
 }
@@ -233,6 +241,14 @@ void ggml_cuda_op_round(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
 void ggml_cuda_op_trunc(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     ggml_cuda_op_unary<op_trunc>(ctx, dst);
 }
+
+void ggml_cuda_op_expm1(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+    ggml_cuda_op_unary<op_expm1>(ctx, dst);
+}
+
+void ggml_cuda_op_softplus(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+    ggml_cuda_op_unary<op_softplus>(ctx, dst);
+}
 /* gated ops */
 
 template <float (*op)(float), typename T>
index 2800c75ba3f7abc50f6625724ef79059f3971f95..609046e56941eab99495196a29e81de148912ccf 100644 (file)
@@ -61,6 +61,10 @@ void ggml_cuda_op_cos(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
 
 void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
 
+void ggml_cuda_op_expm1(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+void ggml_cuda_op_softplus(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
 void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
 
 void ggml_cuda_op_floor(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
index ec37a25337b649a94d12fc456beef1cf17de4702..fe57d4c582b5f3292ebfd89fbd1b491879f722f4 100644 (file)
@@ -102,7 +102,7 @@ static bool ggml_op_is_empty(enum ggml_op op) {
     }
 }
 
-static inline float ggml_softplus(float input) {
+static inline float ggml_compute_softplus_f32(float input) {
     return (input > 20.0f) ? input : logf(1 + expf(input));
 }
 //
index 9be35c1be8456bf538ddb85e91be6c13fefb78df..a5846a23937ce73dae565d21452e6aa8ecf4ac4e 100644 (file)
@@ -935,6 +935,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "COS",
     "SUM",
     "SUM_ROWS",
+    "CUMSUM",
     "MEAN",
     "ARGMAX",
     "COUNT_EQUAL",
@@ -990,6 +991,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "TIMESTEP_EMBEDDING",
     "ARGSORT",
     "LEAKY_RELU",
+    "TRI",
+    "FILL",
 
     "FLASH_ATTN_EXT",
     "FLASH_ATTN_BACK",
@@ -1002,6 +1005,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "RWKV_WKV6",
     "GATED_LINEAR_ATTN",
     "RWKV_WKV7",
+    "SOLVE_TRI",
 
     "UNARY",
 
@@ -1019,7 +1023,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "GLU",
 };
 
-static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
+static_assert(GGML_OP_COUNT == 94, "GGML_OP_COUNT != 94");
 
 static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "none",
@@ -1039,6 +1043,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "cos(x)",
     "Σx",
     "Σx_k",
+    "cumsum(x)",
     "Σx/n",
     "argmax(x)",
     "count_equal(x)",
@@ -1094,6 +1099,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "timestep_embedding(timesteps, dim, max_period)",
     "argsort(x)",
     "leaky_relu(x)",
+    "tri(x)",
+    "fill(x, c)",
 
     "flash_attn_ext(x)",
     "flash_attn_back(x)",
@@ -1106,6 +1113,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "rwkv_wkv6(k, v, r, tf, td, s)",
     "gated_linear_attn(k, v, q, gate, s)",
     "rwkv_wkv7(r, w, k, v, a, b, s)",
+    "A X = B, A triangular, solve X",
 
     "unary(x)",
 
@@ -1123,7 +1131,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "glu(x)",
 };
 
-static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
+static_assert(GGML_OP_COUNT == 94, "GGML_OP_COUNT != 94");
 
 static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
 
@@ -1142,6 +1150,8 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
     "HARDSWISH",
     "HARDSIGMOID",
     "EXP",
+    "EXPM1",
+    "SOFTPLUS",
     "GELU_ERF",
     "XIELU",
     "FLOOR",
@@ -1150,7 +1160,7 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
     "TRUNC",
 };
 
-static_assert(GGML_UNARY_OP_COUNT == 20, "GGML_UNARY_OP_COUNT != 20");
+static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
 
 static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
     "REGLU",
@@ -2258,6 +2268,30 @@ struct ggml_tensor * ggml_log_inplace(
     return ggml_log_impl(ctx, a, true);
 }
 
+struct ggml_tensor * ggml_expm1(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a) {
+    return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
+}
+
+struct ggml_tensor * ggml_expm1_inplace(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a) {
+    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
+}
+
+struct ggml_tensor * ggml_softplus(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a) {
+    return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
+}
+
+struct ggml_tensor * ggml_softplus_inplace(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a) {
+    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
+}
+
 // ggml_sin
 
 static struct ggml_tensor * ggml_sin_impl(
@@ -2341,6 +2375,21 @@ struct ggml_tensor * ggml_sum_rows(
     return result;
 }
 
+// ggml_cumsum
+
+struct ggml_tensor * ggml_cumsum(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a) {
+    GGML_ASSERT(a->type == GGML_TYPE_F32);
+
+    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
+
+    result->op     = GGML_OP_CUMSUM;
+    result->src[0] = a;
+
+    return result;
+}
+
 // ggml_mean
 
 struct ggml_tensor * ggml_mean(
@@ -2668,8 +2717,8 @@ struct ggml_tensor * ggml_xielu(
     struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
 
     ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
-    ggml_set_op_params_f32(result, 1, beta + ggml_softplus(alpha_n));
-    ggml_set_op_params_f32(result, 2, ggml_softplus(alpha_p));
+    ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
+    ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
     ggml_set_op_params_f32(result, 3, beta);
     ggml_set_op_params_f32(result, 4, eps);
 
@@ -5028,6 +5077,61 @@ struct ggml_tensor * ggml_timestep_embedding(
     return result;
 }
 
+// ggml_tri
+
+struct ggml_tensor * ggml_tri(
+    struct ggml_context * ctx,
+    struct ggml_tensor  * a,
+    enum ggml_tri_type    type) {
+    GGML_ASSERT(a->type == GGML_TYPE_F32);
+
+    GGML_ASSERT(ggml_is_contiguous(a));
+    GGML_ASSERT(a->ne[0] == a->ne[1]);
+
+    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
+
+    ggml_set_op_params_i32(result, 0, type);
+
+    result->op = GGML_OP_TRI;
+    result->src[0] = a;
+
+    return result;
+}
+
+// ggml_fill
+
+static struct ggml_tensor * ggml_fill_impl(
+    struct ggml_context * ctx,
+    struct ggml_tensor  * a,
+    float                 c,
+    bool                  inplace) {
+    GGML_ASSERT(a->type == GGML_TYPE_F32);
+    GGML_ASSERT(ggml_is_contiguous(a));
+
+    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+    ggml_set_op_params_f32(result, 0, c);
+
+    result->op = GGML_OP_FILL;
+    result->src[0] = a;
+
+    return result;
+}
+
+struct ggml_tensor * ggml_fill(
+    struct ggml_context * ctx,
+    struct ggml_tensor  * a,
+    float                 c) {
+    return ggml_fill_impl(ctx, a, c, false);
+}
+
+struct ggml_tensor * ggml_fill_inplace(
+    struct ggml_context * ctx,
+    struct ggml_tensor  * a,
+    float                 c) {
+    return ggml_fill_impl(ctx, a, c, true);
+}
+
 // ggml_argsort
 
 struct ggml_tensor * ggml_argsort(
@@ -5882,6 +5986,41 @@ struct ggml_tensor * ggml_opt_step_sgd(
     return result;
 }
 
+// solve_tri
+
+struct ggml_tensor * ggml_solve_tri(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        struct ggml_tensor  * b,
+        bool                  left,
+        bool                  lower,
+        bool                  uni) {
+    GGML_ASSERT(a->type == GGML_TYPE_F32);
+    GGML_ASSERT(b->type == GGML_TYPE_F32);
+
+    // A must be square and lower diagonal
+    GGML_ASSERT(a->ne[0] == a->ne[1]);
+    // B must have same outer dimension as A
+    GGML_ASSERT(a->ne[1] == b->ne[1]);
+
+    // batch dimensions must be equal
+    GGML_ASSERT(a->ne[2] == b->ne[2]);
+    GGML_ASSERT(a->ne[3] == b->ne[3]);
+
+    GGML_ASSERT(ggml_is_contiguous(a));
+    GGML_ASSERT(ggml_is_contiguous(b));
+
+    GGML_ASSERT(lower && left && !uni); // TODO: support other variants
+
+    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
+
+    result->op     = GGML_OP_SOLVE_TRI;
+    result->src[0] = a;
+    result->src[1] = b;
+
+    return result;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 
 struct ggml_hash_set ggml_hash_set_new(size_t size) {
@@ -6454,6 +6593,16 @@ static void ggml_compute_backward(
                         ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
                     }
                 } break;
+                case GGML_UNARY_OP_EXPM1: {
+                    if (src0_needs_grads) {
+                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
+                    }
+                } break;
+                case GGML_UNARY_OP_SOFTPLUS: {
+                    if (src0_needs_grads) {
+                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
+                    }
+                } break;
                 default: {
                     fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
                         __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
index 38b7ddf22178531cd8f301963c41b0a535b611ad..b11793963aa8e86c535b99a492e69f192f26454e 100644 (file)
@@ -175,6 +175,38 @@ static void init_tensor_kq_mask(ggml_tensor * tensor, float min = -1.0f, float m
     ggml_backend_tensor_set(tensor, data_f16.data(), 0, data_f16.size()*sizeof(ggml_fp16_t));
 }
 
+// generate a lower triangular matrix
+static void init_tensor_tril(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
+    GGML_ASSERT(tensor->type == GGML_TYPE_F32);
+    GGML_ASSERT(tensor->ne[0] == tensor->ne[1]);
+
+    GGML_TENSOR_LOCALS(int32_t, ne, tensor, ne);
+    GGML_TENSOR_LOCALS(size_t, nb, tensor, nb);
+
+    std::vector<float> data_f32(ne0*ne1*ne2*ne3);
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<float> dis(min, max);
+
+    for (int64_t i3 = 0; i3 < ne3; i3++) {
+        for (int64_t i2 = 0; i2 < ne2; i2++) {
+            for (int64_t i1 = 0; i1 < ne1; i1++) {
+                for (int64_t i0 = 0; i0 < ne0; i0++) {
+                    int64_t idx = (i0 * nb0 + i1 * nb1 + i2 * nb2 + i3 * nb3) / sizeof(float);
+                    if (i0 <= i1) {
+                        data_f32[idx] = dis(gen);
+                    } else {
+                        data_f32[idx] = 0.0f;
+                    }
+                }
+            }
+        }
+    }
+
+    ggml_backend_tensor_set(tensor, data_f32.data(), 0, ggml_nbytes(tensor));
+}
+
 static std::vector<float> tensor_to_float(const ggml_tensor * t) {
     std::vector<float> tv;
     tv.reserve(ggml_nelements(t));
@@ -1804,7 +1836,8 @@ struct test_unary : public test_case {
 
     ggml_tensor * build_graph(ggml_context * ctx) override {
         const bool grad_supported = op == GGML_UNARY_OP_ABS || op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_NEG ||
-            op == GGML_UNARY_OP_STEP || op == GGML_UNARY_OP_RELU || op == GGML_UNARY_OP_SILU;
+            op == GGML_UNARY_OP_STEP || op == GGML_UNARY_OP_RELU || op == GGML_UNARY_OP_SILU ||
+            op == GGML_UNARY_OP_EXPM1 || op == GGML_UNARY_OP_SOFTPLUS;
 
         ggml_tensor * a;
         if (v & 1) {
@@ -2779,7 +2812,7 @@ struct test_bin_bcast : public test_case {
     const std::array<int, 4> nr;
     int nf; // number of fused ops, nf == 1 -> single op (no fusion)
 
-    bool run_whole_graph() override { return true; }
+    bool run_whole_graph() override { return nf > 1; }
 
     std::string vars() override {
         return VARS_TO_STR4(type, ne, nr, nf);
@@ -5395,6 +5428,7 @@ struct test_pad : public test_case {
     }
 };
 
+// GGML_OP_PAD (with extension)
 struct test_pad_ext : public test_case {
     const ggml_type type;
     const std::array<int64_t, 4> ne_a;
@@ -5802,6 +5836,7 @@ struct test_opt_step_adamw : public test_case {
     }
 };
 
+// GGML_OP_OPT_STEP_SGD
 struct test_opt_step_sgd : public test_case {
     const ggml_type              type;
     const std::array<int64_t, 4> ne;
@@ -5841,6 +5876,170 @@ struct test_opt_step_sgd : public test_case {
     }
 };
 
+// GGML_OP_CUMSUM
+struct test_cumsum : public test_case {
+    const ggml_type              type;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override { return VARS_TO_STR2(type, ne); }
+
+    test_cumsum(ggml_type type = GGML_TYPE_F32,
+            std::array<int64_t, 4> ne = { 10, 5, 4, 3 })
+        : type(type), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_cumsum(ctx, a);
+
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -1.0f, 1.0f);
+        }
+    }
+};
+
+// GGML_OP_XIELU
+struct test_xielu : public test_case {
+    const ggml_type              type;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override { return VARS_TO_STR2(type, ne); }
+
+    test_xielu(ggml_type type = GGML_TYPE_F32,
+            std::array<int64_t, 4> ne = { 10, 5, 4, 3 })
+        : type(type), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        float alpha_n = 4.0f;
+        float alpha_p = 20.0f;
+        float beta = 0.5f;
+        float eps = 0.0000001f;
+
+        ggml_tensor * out = ggml_xielu(ctx, a, alpha_n, alpha_p, beta, eps);
+
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -1.0f, 1.0f);
+        }
+    }
+};
+
+// GGML_OP_TRI
+struct test_tri : public test_case {
+    const ggml_type              type;
+    const std::array<int64_t, 4> ne;
+    const ggml_tri_type          tri_type;
+
+    std::string vars() override { return VARS_TO_STR3(type, ne, tri_type); }
+
+    test_tri(ggml_tri_type tri_type, ggml_type type = GGML_TYPE_F32,
+            std::array<int64_t, 4> ne = { 10, 10, 4, 3 })
+        : type(type), ne(ne), tri_type(tri_type) {
+            GGML_ASSERT(ne[0] == ne[1]);
+        }
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_tri(ctx, a, tri_type);
+
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -1.0f, 1.0f);
+        }
+    }
+};
+
+// GGML_OP_FILL
+struct test_fill : public test_case {
+    const ggml_type              type;
+    const std::array<int64_t, 4> ne;
+    float                        c;
+
+    std::string vars() override { return VARS_TO_STR3(type, ne, c); }
+
+    test_fill(float c, ggml_type type = GGML_TYPE_F32,
+            std::array<int64_t, 4> ne = { 10, 10, 4, 3 })
+        : type(type), ne(ne), c(c) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_fill(ctx, a, c);
+
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+};
+
+// GGML_OP_SOLVE_TRI
+struct test_solve_tri : public test_case {
+    const ggml_type              type;
+    const std::array<int64_t, 4> ne_lhs;
+    const std::array<int64_t, 4> ne_rhs;
+
+    std::string vars() override { return VARS_TO_STR3(type, ne_lhs, ne_rhs); }
+
+    test_solve_tri(ggml_type type = GGML_TYPE_F32,
+            std::array<int64_t, 4> ne_lhs = { 10, 10, 4, 3 },
+            std::array<int64_t, 4> ne_rhs = { 3, 10, 4, 3 }
+        )
+        : type(type), ne_lhs(ne_lhs), ne_rhs(ne_rhs) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne_lhs[0], ne_lhs[1], ne_lhs[2], ne_lhs[3]);
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * b = ggml_new_tensor_4d(ctx, type, ne_rhs[0], ne_rhs[1], ne_rhs[2], ne_rhs[3]);
+        ggml_set_param(b);
+        ggml_set_name(b, "b");
+
+        ggml_tensor * out = ggml_solve_tri(ctx, a, b, true, true, false);
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            if (strcmp(t->name, "a") == 0) {
+                // note: avoid zeros in the diagonal
+                init_tensor_tril(t, 0.1, 1.0f);
+            } else {
+                init_tensor_uniform(t, -1.0f, 1.0f);
+            }
+        }
+    }
+};
+
 enum llm_norm_type {
     LLM_NORM,
     LLM_NORM_RMS,
@@ -6282,6 +6481,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
     for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) {
         for (int v : {0, 1}) {
             for (int op = 0; op < GGML_UNARY_OP_COUNT; op++) {
+                if (op == GGML_UNARY_OP_XIELU) {
+                    continue; // need extra params, separate test
+                }
                 test_cases.emplace_back(new test_unary((ggml_unary_op) op, type, { 128, 2, 2, 2 }, v));
                 test_cases.emplace_back(new test_unary((ggml_unary_op) op, type, { 5, 7, 11, 13 }, v));
             }
@@ -7339,6 +7541,26 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
     test_cases.emplace_back(new test_arange());
     test_cases.emplace_back(new test_timestep_embedding());
     test_cases.emplace_back(new test_leaky_relu());
+    test_cases.emplace_back(new test_cumsum());
+
+    test_cases.emplace_back(new test_xielu());
+
+    test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER));
+    test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER_DIAG));
+    test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER));
+    test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER_DIAG));
+
+    test_cases.emplace_back(new test_fill(0.0f));
+    test_cases.emplace_back(new test_fill(2.0f, GGML_TYPE_F32, { 303, 207, 11, 3 }));
+    test_cases.emplace_back(new test_fill(-152.0f, GGML_TYPE_F32, { 800, 600, 4, 4 }));
+
+    test_cases.emplace_back(new test_solve_tri());
+    test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 11, 11, 1, 1 }, { 5, 11, 1, 1 }));
+    test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 17, 17, 2, 4 }, { 9, 17, 2, 4 }));
+    test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 30, 30, 7, 1 }, { 8, 30, 7, 1 }));
+    test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 42, 42, 5, 2 }, { 10, 42, 5, 2 }));
+    test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 10, 64, 2, 2 }));
+    test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 100, 100, 4, 4 }, { 41, 100, 4, 4 }));
 
     for (bool v : {false, true}) {
         test_cases.emplace_back(new test_pad_ext(GGML_TYPE_F32, {512, 512, 1, 1}, 0, 1, 0, 1, 0, 0, 0, 0, v));