]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
tests: reduce number of FA test permutations (llama/19381)
authorJeff Bolz <redacted>
Fri, 6 Feb 2026 14:50:30 +0000 (08:50 -0600)
committerGeorgi Gerganov <redacted>
Sat, 7 Feb 2026 08:37:38 +0000 (10:37 +0200)
Only test non-F16 for head size 64 and 72 (one a multiple of QK, one not).

tests/test-backend-ops.cpp

index fbe23037cc9418d39c6e20c0536bc1982c2fa741..6fe1780f3bafae4d7f0915873b4909bfa0af4d02 100644 (file)
@@ -8231,6 +8231,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
                                                 for (ggml_prec prec : {GGML_PREC_F32, GGML_PREC_DEFAULT}) {
                                                     if (hsk != 128 && prec == GGML_PREC_DEFAULT) continue;
                                                     for (ggml_type type_KV : {GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0}) {
+                                                        if (type_KV != GGML_TYPE_F16 && hsk != 64 && hsk != 72) continue;
                                                         test_cases.emplace_back(new test_flash_attn_ext(
                                                                     hsk, hsv, nh, {nr2, nr3}, kv, nb, mask, sinks, max_bias, logit_softcap, prec, type_KV));
                                                         // run fewer test cases permuted