From: Aman Gupta Date: Sat, 10 Jan 2026 17:12:57 +0000 (+0800) Subject: test-backend-ops: fix mxfp4 tests on blackwell (#18736) X-Git-Tag: upstream/0.0.7721~22 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b1377188784f9aea26b8abde56d4aee8c733eec7;p=pkg%2Fggml%2Fsources%2Fllama.cpp test-backend-ops: fix mxfp4 tests on blackwell (#18736) --- diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 15567abe..56d277e1 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -454,6 +454,28 @@ static bool ggml_is_view_op(enum ggml_op op) { return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE; } +static bool backend_has_feature(ggml_backend_t backend, const char * feature_name) { + ggml_backend_dev_t dev = ggml_backend_get_device(backend); + ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev); + + auto get_features = (ggml_backend_get_features_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features"); + if (!get_features) { + return false; + } + + const ggml_backend_feature * features = get_features(reg); + if (!features) { + return false; + } + + for (const ggml_backend_feature * f = features; f->name; ++f) { + if (strcmp(f->name, feature_name) == 0 && strcmp(f->value, "1") == 0) { + return true; + } + } + return false; +} + enum test_mode { MODE_TEST, MODE_PERF, @@ -1101,6 +1123,11 @@ struct test_case { return 1e-7; } + virtual double max_nmse_err(ggml_backend_t backend) { + GGML_UNUSED(backend); + return max_nmse_err(); + } + virtual double max_maa_err() { return 1e-4; } @@ -1109,6 +1136,10 @@ struct test_case { return max_nmse_err(); } + virtual double max_err(ggml_backend_t backend) { + return max_nmse_err(backend); + } + virtual double err(const float * a, const float * b, size_t n) { return nmse(a, b, n); } @@ -1378,8 +1409,8 @@ struct test_case { } double err = ud->tc->err(f1.data(), f2.data(), f1.size()); - if (err > ud->tc->max_err()) { - printf("[%s] ERR = %.9f > %.9f ", ggml_op_desc(t1), err, ud->tc->max_err()); + if (err > ud->tc->max_err(ud->backend1)) { + printf("[%s] ERR = %.9f > %.9f ", ggml_op_desc(t1), err, ud->tc->max_err(ud->backend1)); //for (int i = 0; i < (int) f1.size(); i++) { // printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]); //} @@ -3686,6 +3717,14 @@ struct test_mul_mat : public test_case { return 5e-4; } + double max_nmse_err(ggml_backend_t backend) override { + // for blackwell we quantize activations to mxfp4 instead of q8_1 so we add higher tolerance + if (type_a == GGML_TYPE_MXFP4 && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) { + return 2e-2; + } + return max_nmse_err(); + } + int64_t grad_nmax() override { return 20000; } @@ -3814,6 +3853,14 @@ struct test_mul_mat_id : public test_case { return 5e-4; } + double max_nmse_err(ggml_backend_t backend) override { + // for blackwell we quantize activations to mxfp4 instead of q8_1 so we add higher tolerance + if (type_a == GGML_TYPE_MXFP4 && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) { + return 2e-2; + } + return max_nmse_err(); + } + uint64_t op_flops(ggml_tensor * t) override { GGML_UNUSED(t); return 2 * m * k * n * n_used;