UNUSED(blocklen);
#if defined(__ARM_FEATURE_SVE)
- if (svcntw() == 8) {
- GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
+ if (ggml_sve_cnt_b == QK8_0) {
+ GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
}
#endif
UNUSED(blocklen);
#if defined(__ARM_FEATURE_SVE)
- if (svcntw() == 8) {
- GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
+ if (ggml_sve_cnt_b == QK8_0) {
+ GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
}
#endif
UNUSED(blocklen);
#if defined(__ARM_FEATURE_SVE) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
- if (svcntw() == 8) {
+ if (ggml_sve_cnt_b == QK8_0) {
const void * b_ptr = vx;
const void * a_ptr = vy;
float * res_ptr = s;
return;
}
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
- GGML_ASSERT((ggml_cpu_has_sve() && (svcntw() == 8)) &&
+ GGML_ASSERT((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
"performance");
}
else if (ggml_cpu_has_neon()) {
- GGML_ASSERT(((ggml_cpu_has_sve() && (svcntw() == 8)) || ggml_cpu_has_matmul_int8()) &&
+ GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) &&
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
"quantization format for optimal performance");
}
UNUSED(blocklen);
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
- if (svcntw() == 8) {
- GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
+ if (ggml_sve_cnt_b == QK8_0) {
+ GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
}
#endif
UNUSED(blocklen);
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
- if (svcntw() == 8) {
- GGML_ASSERT(!(ggml_cpu_has_sve() && (svcntw() == 8)) &&
+ if (ggml_sve_cnt_b == QK8_0) {
+ GGML_ASSERT(!(ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
}
#endif
UNUSED(blocklen);
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
- if (svcntw() == 8) {
+ if (ggml_sve_cnt_b == QK8_0) {
const void * b_ptr = vx;
const void * a_ptr = vy;
float * res_ptr = s;
return;
}
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
- GGML_ASSERT((ggml_cpu_has_sve() && (svcntw() == 8)) &&
+ GGML_ASSERT((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) &&
"__ARM_FEATURE_SVE for vector size of 256-bits not defined, use the Q4_0_4_8 quantization format for optimal "
"performance");
}
else if (ggml_cpu_has_neon()) {
- GGML_ASSERT(((ggml_cpu_has_sve() && (svcntw() == 8)) || ggml_cpu_has_matmul_int8()) &&
+ GGML_ASSERT(((ggml_cpu_has_sve() && (ggml_sve_cnt_b == QK8_0)) || ggml_cpu_has_matmul_int8()) &&
"__ARM_FEATURE_SVE for vector size of 256-bits and __ARM_FEATURE_MATMUL_INT8 not defined, use the Q4_0_4_4 "
"quantization format for optimal performance");
}
float sumf = 0;
#if defined(__ARM_FEATURE_SVE)
- if (svcntb() == QK8_0) {
+ if (ggml_sve_cnt_b == QK8_0) {
const svbool_t ptrueh = svptrue_pat_b8(SV_VL16);
const svbool_t ptruel = svnot_b_z(svptrue_b8(), ptrueh);
float sumf = 0;
#if defined(__ARM_FEATURE_SVE)
- if (svcntb() == QK8_0) {
+ if (ggml_sve_cnt_b == QK8_0) {
svfloat32_t sumv0 = svdup_n_f32(0.0f);
svfloat32_t sumv1 = svdup_n_f32(0.0f);