foreach (feat ${ARGN})
set(GGML_INTERNAL_${feat} ON)
endforeach()
+ elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
+ foreach (feat ${ARGN})
+ set(GGML_INTERNAL_${feat} ON)
+ endforeach()
endif()
ggml_add_cpu_backend_variant_impl(${tag_name})
else()
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
endif()
+ elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+ ggml_add_cpu_backend_variant(power0)
+ ggml_add_cpu_backend_variant(power7_1 POWER7)
+ ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
+ ggml_add_cpu_backend_variant(power8_1 POWER8)
+ ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
+ ggml_add_cpu_backend_variant(power9 POWER9 VSX)
+ ggml_add_cpu_backend_variant(power10 POWER10 VSX)
+ ggml_add_cpu_backend_variant(power11 POWER11 VSX)
+ else()
+ message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
+ endif()
else()
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
endif()
else()
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
endif()
+ elseif(GGML_CPU_ALL_VARIANTS)
+ # Begin with the lowest baseline
+ set(ARCH_DEFINITIONS "")
+
+ # When a feature is selected, bump the MCPU to the first
+ # version that supported it
+ foreach(PVER RANGE 7 11)
+ if(DEFINED GGML_INTERNAL_POWER${PVER})
+ set(POWERPC_MCPU "power${PVER}")
+ list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
+ endif()
+ endforeach()
+ if (GGML_INTERNAL_VSX)
+ list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
+ list(APPEND ARCH_FLAGS -mvsx)
+ endif()
+
+ if (DEFINED POWERPC_MCPU)
+ list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
+ endif()
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
else()
if (GGML_CPU_POWERPC_CPUTYPE)
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
--- /dev/null
+# include "ggml-backend-impl.h"
+
+#if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
+
+#if defined(__linux__)
+#include <sys/auxv.h>
+#endif
+
+#include <string>
+
+struct powerpc_features {
+ std::string platform = "";
+ int power_version = -1;
+
+ bool has_vsx = false;
+
+ powerpc_features() {
+#if defined(__linux__)
+ unsigned long auxval = getauxval(AT_PLATFORM);
+ if (auxval) {
+ platform = std::string(reinterpret_cast<const char*>(auxval));
+ // TBD: Do systems exist that return this in uppercase?
+ if (platform.substr(0, 5) == "power") {
+ // Extractt a numeric suffix, if one exists
+ int vpos = -1;
+ for (int i = platform.length() - 1; i >= 0; i--) {
+ if (std::isdigit(platform[i])) {
+ vpos = i;
+ } else {
+ break;
+ }
+ }
+ if (vpos > -1) {
+ power_version = std::stoi(platform.substr(vpos));
+ }
+ }
+ }
+#endif
+ if (power_version >= 9) {
+ has_vsx = true;
+ }
+ }
+};
+
+static int ggml_backend_cpu_powerpc_score() {
+ int score = 1;
+ powerpc_features pf;
+
+// Platform scores
+#if defined(GGML_USE_POWER7)
+ if (pf.power_version < 7) { return 0; }
+ score += 1<<1;
+#endif
+#if defined(GGML_USE_POWER8)
+ if (pf.power_version < 8) { return 0; }
+ score += 1<<2;
+#endif
+#if defined(GGML_USE_POWER9)
+ if (pf.power_version < 9) { return 0; }
+ score += 1<<3;
+#endif
+#if defined(GGML_USE_POWER10)
+ if (pf.power_version < 10) { return 0; }
+ score += 1<<4;
+#endif
+#if defined(GGML_USE_POWER11)
+ if (pf.power_version < 11) { return 0; }
+ score += 1<<5;
+#endif
+
+// Feature scores
+#if defined(GGML_USE_VSX)
+ if (!pf.has_vsx) { return 0; }
+ score += 1<<6;
+#endif
+
+ return score;
+}
+
+GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score)
+
+#endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
}
};
-// instance for Q4
-static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
-static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
-static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
-static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
-
-// instance for IQ4
-static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
-
} // namespace ggml::cpu::repack
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
+
+ // instance for Q4
+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
+ static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
+
+ // instance for IQ4
+ static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
+
if (cur->type == GGML_TYPE_Q4_0) {
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
if (cur->ne[1] % 8 == 0) {
- return &ggml::cpu::repack::q4_0_8x8_q8_0;
+ return &q4_0_8x8_q8_0;
}
}
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
if (cur->ne[1] % 4 == 0) {
- return &ggml::cpu::repack::q4_0_4x8_q8_0;
+ return &q4_0_4x8_q8_0;
}
}
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
if (cur->ne[1] % 4 == 0) {
- return &ggml::cpu::repack::q4_0_4x4_q8_0;
+ return &q4_0_4x4_q8_0;
}
}
} else if (cur->type == GGML_TYPE_Q4_K) {
if (ggml_cpu_has_avx2()) {
if (cur->ne[1] % 8 == 0) {
- return &ggml::cpu::repack::q4_K_8x8_q8_K;
+ return &q4_K_8x8_q8_K;
}
}
} else if (cur->type == GGML_TYPE_IQ4_NL) {
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
if (cur->ne[1] % 4 == 0) {
- return &ggml::cpu::repack::iq4_nl_4x4_q8_0;
+ return &iq4_nl_4x4_q8_0;
}
}
}