endif # GGML_OPENBLAS64
ifdef GGML_BLIS
- MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
+ MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis
MK_LDFLAGS += -lblis -L/usr/local/lib
OBJ_GGML += ggml/src/ggml-blas.o
endif # GGML_BLIS
+ifdef GGML_NVPL
+ MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas
+ MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp
+ OBJ_GGML += ggml/src/ggml-blas.o
+endif # GGML_NVPL
+
ifndef GGML_NO_LLAMAFILE
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
OBJ_GGML += ggml/src/llamafile/sgemm.o
# include <Accelerate/Accelerate.h>
#elif defined(GGML_BLAS_USE_MKL)
# include <mkl.h>
+#elif defined(GGML_BLAS_USE_BLIS)
+# include <blis.h>
+#elif defined(GGML_BLAS_USE_NVPL)
+# include <nvpl_blas.h>
#else
# include <cblas.h>
-# ifdef BLIS_ENABLE_CBLAS
-# include <blis.h>
-# endif
#endif
struct ggml_backend_blas_context {
openblas_set_num_threads(ctx->n_threads);
#endif
-#if defined(BLIS_ENABLE_CBLAS)
+#if defined(GGML_BLAS_USE_BLIS)
bli_thread_set_num_threads(ctx->n_threads);
#endif
+#if defined(GGML_BLAS_USE_NVPL)
+ nvpl_blas_set_num_threads(ctx->n_threads);
+#endif
+
for (int64_t i13 = 0; i13 < ne13; i13++) {
for (int64_t i12 = 0; i12 < ne12; i12++) {
const int64_t i03 = i13/r3;