option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
+if (APPLE)
+ option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
+else()
+ option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
+endif()
+
option(WHISPER_PERF "whisper: enable perf timings" OFF)
-option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
# sanitizers
endif()
endif()
+if (WHISPER_SUPPORT_OPENBLAS)
+ find_library(OPENBLAS_LIB openblas)
+ if (OPENBLAS_LIB)
+ message(STATUS "OpenBLAS found")
+
+ set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${OPENBLAS_LIB})
+ set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
+ else()
+ message(WARNING "OpenBLAS not found")
+ endif()
+endif()
+
# compiler flags
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
#ifdef GGML_USE_ACCELERATE
#include <Accelerate/Accelerate.h>
+#elif GGML_USE_OPENBLAS
+#include <cblas.h>
#endif
// floating point type used to accumulate sums
// nb00 < nb01 - src0 is transposed
// compute by src0 columns
-//#ifdef GGML_USE_ACCELERATE
-// if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
-// GGML_ASSERT(ggml_is_contiguous(src0));
-// GGML_ASSERT(nb10 == sizeof(float));
-//
-// if (params->ith != 0) return;
-//
-// if (params->type == GGML_TASK_INIT) {
-// return;
-// }
-//
-// if (params->type == GGML_TASK_FINALIZE) {
-// return;
-// }
-//
-// float * const wdata = params->wdata;
-//
-// for (int i03 = 0; i03 < ne03; i03++) {
-// for (int i02 = 0; i02 < ne02; i02++) {
-// const float * x = (float *) (src0->data);
-// const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
-//
-// float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
-//
-// // zT = y * xT
-// {
-// cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
-// ne11, ne01, ne10,
-// 1.0f, y, ne10,
-// x, ne10,
-// 0.0f, d, ne01);
-// }
-// }
-// }
-//
-// //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
-//
-// return;
-// }
-//#endif
+#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
+ if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
+ GGML_ASSERT(ggml_is_contiguous(src0));
+ GGML_ASSERT(nb10 == sizeof(float));
+
+ if (params->ith != 0) return;
+
+ if (params->type == GGML_TASK_INIT) {
+ return;
+ }
+
+ if (params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ for (int i03 = 0; i03 < ne03; i03++) {
+ for (int i02 = 0; i02 < ne02; i02++) {
+ const float * x = (float *) (src0->data);
+ const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
+
+ float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
+
+ // zT = y * xT
+ {
+ cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
+ ne11, ne01, ne10,
+ 1.0f, y, ne10,
+ x, ne10,
+ 0.0f, d, ne01);
+ }
+ }
+ }
+
+ //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
+
+ return;
+ }
+#endif
if (params->type == GGML_TASK_INIT) {
if (nb01 >= nb00) {
// nb00 < nb01 - src0 is transposed
// compute by src0 columns
-#ifdef GGML_USE_ACCELERATE
+#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
GGML_ASSERT(nb10 == sizeof(float));
} else {
if (node->src0->type == GGML_TYPE_F16 &&
node->src1->type == GGML_TYPE_F32) {
-#ifdef GGML_USE_ACCELERATE
+#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
} else {
}
int ggml_cpu_has_blas(void) {
-#if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE)
+#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
return 1;
#else
return 0;