function(ggml_add_cpu_backend_variant tag_name)
set(GGML_CPU_TAG_NAME ${tag_name})
# other: OPENMP LLAMAFILE CPU_HBM
- foreach (feat NATIVE
- SSE42
- AVX AVX2 BMI2 AVX_VNNI FMA F16C
- AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
- AMX_TILE AMX_INT8 AMX_BF16)
- set(GGML_${feat} OFF)
- endforeach()
-
- foreach (feat ${ARGN})
- set(GGML_${feat} ON)
- endforeach()
+ if (GGML_SYSTEM_ARCH STREQUAL "x86")
+ foreach (feat NATIVE
+ SSE42
+ AVX AVX2 BMI2 AVX_VNNI FMA F16C
+ AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
+ AMX_TILE AMX_INT8 AMX_BF16)
+ set(GGML_${feat} OFF)
+ endforeach()
+
+ foreach (feat ${ARGN})
+ set(GGML_${feat} ON)
+ endforeach()
+ elseif (GGML_SYSTEM_ARCH STREQUAL "ARM")
+ foreach (feat ${ARGN})
+ set(GGML_INTERNAL_${feat} ON)
+ endforeach()
+ endif()
ggml_add_cpu_backend_variant_impl(${tag_name})
endfunction()
if (GGML_CPU_ALL_VARIANTS)
if (NOT GGML_BACKEND_DL)
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
+ elseif (GGML_CPU_ARM_ARCH)
+ message(FATAL_ERROR "Cannot use both GGML_CPU_ARM_ARCH and GGML_CPU_ALL_VARIANTS")
endif()
if (GGML_SYSTEM_ARCH STREQUAL "x86")
ggml_add_cpu_backend_variant(x64)
# MSVC doesn't support AMX
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
endif()
+ elseif(GGML_SYSTEM_ARCH STREQUAL "ARM" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
+ # Many of these features are optional so we build versions with popular
+ # combinations and name the backends based on the version they were
+ # first released with
+ ggml_add_cpu_backend_variant(armv8.0_1)
+ ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD)
+ ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
+ ggml_add_cpu_backend_variant(armv8.2_3 DOTPROD FP16_VECTOR_ARITHMETIC SVE)
+ ggml_add_cpu_backend_variant(armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8)
+ ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2)
+ ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME)
+ ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME)
else()
- message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported on ${GGML_SYSTEM_ARCH}")
+ message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
endif()
elseif (GGML_CPU)
ggml_add_cpu_backend_variant_impl("")
+function(ggml_add_cpu_backend_features cpu_name arch)
+ # The feature detection code is compiled as a separate target so that
+ # it can be built without the architecture flags
+ # Since multiple variants of the CPU backend may be included in the same
+ # build, using set_source_files_properties() to set the arch flags is not possible
+ set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
+ add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
+ target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
+ target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
+ target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
+ set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME})
+endfunction()
+
function(ggml_add_cpu_backend_variant_impl tag_name)
if (tag_name)
set(GGML_CPU_NAME ggml-cpu-${tag_name})
else()
if (GGML_CPU_ARM_ARCH)
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
+ elseif(GGML_CPU_ALL_VARIANTS)
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+ # Begin with the lowest baseline
+ set(ARM_MCPU "armv8-a")
+ set(ARCH_TAGS "")
+ set(ARCH_DEFINITIONS "")
+
+ # When a feature is selected, bump the MCPU to the first
+ # version that supported it
+ if (GGML_INTERNAL_DOTPROD)
+ set(ARM_MCPU "armv8.2-a")
+ set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
+ list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
+ endif()
+ if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
+ set(ARM_MCPU "armv8.2-a")
+ set(ARCH_TAGS "${ARCH_TAGS}+fp16")
+ list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
+ endif()
+ if (GGML_INTERNAL_SVE)
+ set(ARM_MCPU "armv8.2-a")
+ set(ARCH_TAGS "${ARCH_TAGS}+sve")
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
+ endif()
+ if (GGML_INTERNAL_MATMUL_INT8)
+ set(ARM_MCPU "armv8.6-a")
+ set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
+ list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
+ endif()
+ if (GGML_INTERNAL_SVE2)
+ set(ARM_MCPU "armv8.6-a")
+ set(ARCH_TAGS "${ARCH_TAGS}+sve2")
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
+ endif()
+ if (GGML_INTERNAL_SME)
+ set(ARM_MCPU "armv9.2-a")
+ set(ARCH_TAGS "${ARCH_TAGS}+sme")
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
+ endif()
+
+ list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
+ endif()
endif()
endif()
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
endif()
-
- # The feature detection code is compiled as a separate target so that
- # it can be built without the architecture flags
- # Since multiple variants of the CPU backend may be included in the same
- # build, using set_source_files_properties() to set the arch flags is not possible
- set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
- add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/x86/cpu-feats.cpp)
- target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
- target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
- target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
- set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
- target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS})
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
message(STATUS "PowerPC detected")
--- /dev/null
+#include "ggml-backend-impl.h"
+
+#if defined(__aarch64__)
+
+#if defined(__linux__)
+#include <sys/auxv.h>
+#elif defined(__APPLE__)
+#include <sys/sysctl.h>
+#endif
+
+#if !defined(HWCAP2_I8MM)
+#define HWCAP2_I8MM (1 << 13)
+#endif
+
+#if !defined(HWCAP2_SME)
+#define HWCAP2_SME (1 << 23)
+#endif
+
+struct aarch64_features {
+ // has_neon not needed, aarch64 has NEON guaranteed
+ bool has_dotprod = false;
+ bool has_fp16_va = false;
+ bool has_sve = false;
+ bool has_sve2 = false;
+ bool has_i8mm = false;
+ bool has_sme = false;
+
+ aarch64_features() {
+#if defined(__linux__)
+ uint32_t hwcap = getauxval(AT_HWCAP);
+ uint32_t hwcap2 = getauxval(AT_HWCAP2);
+
+ has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
+ has_fp16_va = !!(hwcap & HWCAP_FPHP);
+ has_sve = !!(hwcap & HWCAP_SVE);
+ has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
+ has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
+ has_sme = !!(hwcap2 & HWCAP2_SME);
+#elif defined(__APPLE__)
+ int oldp = 0;
+ size_t size = sizeof(oldp);
+
+ if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) == 0) {
+ has_dotprod = static_cast<bool>(oldp);
+ }
+
+ if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) == 0) {
+ has_i8mm = static_cast<bool>(oldp);
+ }
+
+ if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) == 0) {
+ has_sme = static_cast<bool>(oldp);
+ }
+
+ // Apple apparently does not implement SVE yet
+#endif
+ }
+};
+
+static int ggml_backend_cpu_aarch64_score() {
+ int score = 1;
+ aarch64_features af;
+
+#ifdef GGML_USE_DOTPROD
+ if (!af.has_dotprod) { return 0; }
+ score += 1<<1;
+#endif
+#ifdef GGML_USE_FP16_VECTOR_ARITHMETIC
+ if (!af.has_fp16_va) { return 0; }
+ score += 1<<2;
+#endif
+#ifdef GGML_USE_SVE
+ if (!af.has_sve) { return 0; }
+ score += 1<<3;
+#endif
+#ifdef GGML_USE_MATMUL_INT8
+ if (!af.has_i8mm) { return 0; }
+ score += 1<<4;
+#endif
+#ifdef GGML_USE_SVE2
+ if (!af.has_sve2) { return 0; }
+ score += 1<<5;
+#endif
+#ifdef GGML_USE_SME
+ if (!af.has_sme) { return 0; }
+ score += 1<<6;
+#endif
+
+ return score;
+}
+
+GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_aarch64_score)
+
+# endif // defined(__aarch64__)