cuda : fix jetson compile error (#4560)

author FantasyGmm <redacted>

Fri, 22 Dec 2023 15:11:12 +0000 (23:11 +0800)

committer GitHub <redacted>

Fri, 22 Dec 2023 15:11:12 +0000 (17:11 +0200)
author FantasyGmm <redacted>
Fri, 22 Dec 2023 15:11:12 +0000 (23:11 +0800)
committer GitHub <redacted>
Fri, 22 Dec 2023 15:11:12 +0000 (17:11 +0200)
diff --git a/Makefile b/Makefile

index 42686ce7147da3afdb260e1ebe48b4d45d9a8809..6a998091be5494829d04fe5f290e168b28b46863 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -282,8 +282,17 @@ endif
  ifneq ($(filter aarch64%,$(UNAME_M)),)
         # Apple M1, M2, etc.
         # Raspberry Pi 3, 4, Zero 2 (64-bit)
+       # Nvidia Jetson
         MK_CFLAGS   += -mcpu=native
         MK_CXXFLAGS += -mcpu=native
+       JETSON_RELEASE_INFO = $(shell jetson_release)
+       ifdef JETSON_RELEASE_INFO
+               ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
+                       JETSON_EOL_MODULE_DETECT = 1
+                       CC = aarch64-unknown-linux-gnu-gcc
+                       cxx = aarch64-unknown-linux-gnu-g++
+               endif
+       endif
  endif
  
  ifneq ($(filter armv6%,$(UNAME_M)),)
@@ -357,10 +366,13 @@ ifdef LLAMA_BLIS
  endif # LLAMA_BLIS
  
  ifdef LLAMA_CUBLAS
-       MK_CPPFLAGS  += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
-       MK_LDFLAGS   += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
+       MK_CPPFLAGS  += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include
+       MK_LDFLAGS   += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib
         OBJS         += ggml-cuda.o
-       MK_NVCCFLAGS  = --forward-unknown-to-host-compiler -use_fast_math
+       MK_NVCCFLAGS  = -use_fast_math
+ifndef JETSON_EOL_MODULE_DETECT
+       MK_NVCCFLAGS += --forward-unknown-to-host-compiler
+endif # JETSON_EOL_MODULE_DETECT
  
  ifdef LLAMA_DEBUG
         MK_NVCCFLAGS += -lineinfo
@@ -417,7 +429,11 @@ ifdef LLAMA_CUDA_CCBIN
         MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
  endif
  ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
+ifdef JETSON_EOL_MODULE_DETECT
+       $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
+else
         $(NVCC) $(BASE_CXXFLAGS) $(NVCCFLAGS) -Wno-pedantic -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
+endif # JETSON_EOL_MODULE_DETECT
  endif # LLAMA_CUBLAS
  
  ifdef LLAMA_CLBLAST
diff --git a/README.md b/README.md

index 377d3928bdacb106620c52ec10465fd6fd3ae571..649c3b3334387e4cba29ad954beee376afeb33de 100644 (file)
--- a/README.md
+++ b/README.md
@@ -396,6 +396,9 @@ Building the program with BLAS support may lead to some performance improvements
  - #### cuBLAS
  
    This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads).
+
+  For Jetson user, if you have Jetson Orin, you can try this: [Offical Support](https://www.jetson-ai-lab.com/tutorial_text-generation.html). If you are using an old model(nano/TX2), need some additional operations before compiling.
+
    - Using `make`:
      ```bash
      make LLAMA_CUBLAS=1
diff --git a/ggml-cuda.cu b/ggml-cuda.cu

index da8fd1e09c7be35e3fdc390b4480dd634bbaea5f..b124774a9336020cbc25d4add64a3403321df1aa 100644 (file)
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -90,6 +90,13 @@
  #include <cuda_runtime.h>
  #include <cublas_v2.h>
  #include <cuda_fp16.h>
+// CUDA 10.2 does not have these macro definitions.
+#ifndef CUBLAS_TF32_TENSOR_OP_MATH
+#define CUBLAS_TF32_TENSOR_OP_MATH CUBLAS_TENSOR_OP_MATH
+#define CUBLAS_COMPUTE_16F CUDA_R_16F
+#define CUBLAS_COMPUTE_32F CUDA_R_32F
+#define cublasComputeType_t cudaDataType_t
+#endif
  #endif // defined(GGML_USE_HIPBLAS)
  
  #include "ggml-cuda.h"
diff --git a/ggml-quants.c b/ggml-quants.c

index 0e8163a16b39549671363ac859cad2a7e0aaeefa..a15a240487084c6850d30a16882a2acf4b9f9df6 100644 (file)
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -3677,7 +3677,7 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
  
          const uint8x16_t mins = vshrq_n_u8(mins_and_scales, 4);
          const ggml_int16x8x2_t q8sums = ggml_vld1q_s16_x2(y[i].bsums);
-        const ggml_int16x8x2_t mins16 = {vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mins))), vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mins)))};
+        const ggml_int16x8x2_t mins16 = {{vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mins))), vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mins)))}};
          const int32x4_t s0 = vaddq_s32(vmull_s16(vget_low_s16 (mins16.val[0]), vget_low_s16 (q8sums.val[0])),
                                         vmull_s16(vget_high_s16(mins16.val[0]), vget_high_s16(q8sums.val[0])));
          const int32x4_t s1 = vaddq_s32(vmull_s16(vget_low_s16 (mins16.val[1]), vget_low_s16 (q8sums.val[1])),
@@ -6626,7 +6626,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
  
          const ggml_int16x8x2_t q8sums = ggml_vld1q_s16_x2(y[i].bsums);
          const int8x16_t scales = vld1q_s8(scale);
-        const ggml_int16x8x2_t q6scales = {vmovl_s8(vget_low_s8(scales)), vmovl_s8(vget_high_s8(scales))};
+        const ggml_int16x8x2_t q6scales = {{vmovl_s8(vget_low_s8(scales)), vmovl_s8(vget_high_s8(scales))}};
  
          const int32x4_t prod = vaddq_s32(vaddq_s32(vmull_s16(vget_low_s16 (q8sums.val[0]), vget_low_s16 (q6scales.val[0])),
                                                     vmull_s16(vget_high_s16(q8sums.val[0]), vget_high_s16(q6scales.val[0]))),
author	FantasyGmm <redacted>
	Fri, 22 Dec 2023 15:11:12 +0000 (23:11 +0800)
committer	GitHub <redacted>
	Fri, 22 Dec 2023 15:11:12 +0000 (17:11 +0200)
Makefile		patch \| blob \| history
README.md		patch \| blob \| history
ggml-cuda.cu		patch \| blob \| history
ggml-quants.c		patch \| blob \| history