Avoid using __fp16 on ARM with old nvcc (llama/10616)

author Frankie Robertson <redacted>

Wed, 4 Dec 2024 00:41:37 +0000 (02:41 +0200)

committer Georgi Gerganov <redacted>

Thu, 5 Dec 2024 12:27:39 +0000 (14:27 +0200)
author Frankie Robertson <redacted>
Wed, 4 Dec 2024 00:41:37 +0000 (02:41 +0200)
committer Georgi Gerganov <redacted>
Thu, 5 Dec 2024 12:27:39 +0000 (14:27 +0200)
diff --git a/src/ggml-impl.h b/src/ggml-impl.h

index 78e3af8f2971cb0c30378e6ec95c1599018e8ade..00a1546a7d5d0d39ab6e192804e4c158f38becd2 100644 (file)
--- a/src/ggml-impl.h
+++ b/src/ggml-impl.h
@@ -310,14 +310,14 @@ void ggml_aligned_free(void * ptr, size_t size);
  // FP16 to FP32 conversion
  
  #if defined(__ARM_NEON)
-    #ifdef _MSC_VER
+    #if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
          typedef uint16_t ggml_fp16_internal_t;
      #else
          typedef __fp16 ggml_fp16_internal_t;
      #endif
  #endif
  
-#if defined(__ARM_NEON) && !defined(_MSC_VER)
+#if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
      #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
      #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
author	Frankie Robertson <redacted>
	Wed, 4 Dec 2024 00:41:37 +0000 (02:41 +0200)
committer	Georgi Gerganov <redacted>
	Thu, 5 Dec 2024 12:27:39 +0000 (14:27 +0200)