From: anzz1 Date: Tue, 28 Mar 2023 19:44:29 +0000 (+0300) Subject: Enable Fused-Multiply-Add (FMA) and F16C/CVT16 vector extensions on MSVC (#375) X-Git-Tag: gguf-v0.4.0~1069 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=5a5f8b1501fbb34367225544010ddfc306d6d2fe;p=pkg%2Fggml%2Fsources%2Fllama.cpp Enable Fused-Multiply-Add (FMA) and F16C/CVT16 vector extensions on MSVC (#375) * Enable Fused-Multiply-Add (FMA) instructions on MSVC __FMA__ macro does not exist in MSVC * Enable F16C/CVT16 vector extensions on MSVC __F16C__ macro does not exist in MSVC, but is implied with AVX2/AVX512 * MSVC cvt intrinsics * Add __SSE3__ macro for MSVC too because why not even though it's not currently used for anything when AVX is defined --- diff --git a/ggml.c b/ggml.c index 222d199b..efe9316b 100644 --- a/ggml.c +++ b/ggml.c @@ -79,6 +79,19 @@ static int sched_yield (void) { typedef void* thread_ret_t; #endif +// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512 +#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__)) +#ifndef __FMA__ +#define __FMA__ +#endif +#ifndef __F16C__ +#define __F16C__ +#endif +#ifndef __SSE3__ +#define __SSE3__ +#endif +#endif + #ifdef __HAIKU__ #define static_assert(cond, msg) _Static_assert(cond, msg) #endif @@ -172,8 +185,13 @@ typedef double ggml_float; #ifdef __F16C__ +#ifdef _MSC_VER +#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x))) +#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0) +#else #define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x) #define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0) +#endif #elif defined(__POWER9_VECTOR__)