#define GGML_ALIGNED_FREE(ptr) free(ptr)
#endif
-#define UNUSED(x) (void)(x)
+#define UNUSED GGML_UNUSED
#define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
+//
+// tensor access macros
+//
+
+#define GGML_TENSOR_UNARY_OP_LOCALS \
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+
+#define GGML_TENSOR_BINARY_OP_LOCALS \
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); \
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb); \
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+
#if defined(GGML_USE_ACCELERATE)
#include <Accelerate/Accelerate.h>
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
return;
}
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
return;
}
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
- GGML_ASSERT(dst->type == GGML_TYPE_F16);
+ GGML_ASSERT(dst->type == GGML_TYPE_F16);
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F16);
}
const int nr = ggml_nrows(src0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
- GGML_ASSERT(dst->type == GGML_TYPE_F16);
+ GGML_ASSERT(dst->type == GGML_TYPE_F16);
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F16);
- GGML_ASSERT(dst->type == GGML_TYPE_F16);
+ GGML_ASSERT(dst->type == GGML_TYPE_F16);
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
const enum ggml_type type = src0->type;
dequantize_row_q_t const dequantize_row_q = quantize_fns[type].dequantize_row_q;
const int nr = ggml_nrows(src1);
const int nc = src1->ne[0];
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
// src0 and dst as viewed during acc
const size_t nb0 = ggml_element_size(src0);
}
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
const int64_t nr = ggml_nrows(src0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
}
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
assert(ggml_is_scalar(dst));
assert(src0->nb[0] == sizeof(float));
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
ggml_float sum = 0;
ggml_float row_sum = 0;
GGML_ASSERT(src0->nb[0] == sizeof(float));
GGML_ASSERT(dst->nb[0] == sizeof(float));
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
GGML_ASSERT(ne0 == 1);
GGML_ASSERT(ne1 == ne01);
GGML_ASSERT(ne2 == ne02);
GGML_ASSERT(ne3 == ne03);
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
for (int64_t i3 = 0; i3 < ne03; i3++) {
for (int64_t i2 = 0; i2 < ne02; i2++) {
for (int64_t i1 = 0; i1 < ne01; i1++) {
assert(src0->nb[0] == sizeof(float));
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
assert(ne0 == 1);
assert(ne1 == ne01);
UNUSED(ne2);
UNUSED(ne3);
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
for (int64_t i03 = 0; i03 < ne03; i03++) {
for (int64_t i02 = 0; i02 < ne02; i02++) {
for (int64_t i01 = 0; i01 < ne01; i01++) {
return;
}
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
// guaranteed to be an integer due to the check in ggml_can_repeat
const int nr0 = (int)(ne0/ne00);
return;
}
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
// guaranteed to be an integer due to the check in ggml_can_repeat
const int nr0 = (int)(ne00/ne0);
const int ith = params->ith;
const int nth = params->nth;
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
const float eps = 1e-5f; // TODO: make this a parameter
const int ith = params->ith;
const int nth = params->nth;
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
const float eps = 1e-6f; // TODO: make this a parameter
const int ith = params->ith;
const int nth = params->nth;
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const float eps = 1e-6f; // TODO: make this a parameter
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
-#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
- const int64_t ne10 = src1->ne[0];
-#endif
- const int64_t ne11 = src1->ne[1];
-#ifndef NDEBUG
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int nb00 = src0->nb[0];
-#endif
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
-
-#ifndef NDEBUG
- const int nb10 = src1->nb[0];
-#endif
- const int nb11 = src1->nb[1];
- const int nb12 = src1->nb[2];
- const int nb13 = src1->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
//const int64_t ne = ne0*ne1*ne2*ne3;
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- const int nb12 = src1->nb[2];
- const int nb13 = src1->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
-
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- const int nb12 = src1->nb[2];
- const int nb13 = src1->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
enum ggml_type const vec_dot_type = quantize_fns[type].vec_dot_type;
// we don't support permuted src0 or src1
- GGML_ASSERT(nb00 == (int) GGML_TYPE_SIZE[type]);
+ GGML_ASSERT(nb00 == GGML_TYPE_SIZE[type]);
GGML_ASSERT(nb10 == sizeof(float));
// dst cannot be transposed or permuted
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- //const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- const int nb12 = src1->nb[2];
- const int nb13 = src1->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
const int nr = ggml_nrows(src1);
const int nc = src1->ne[0];
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
// src0 and dst as viewed during set
const size_t nb0 = ggml_element_size(src0);
// TODO: handle transposed/permuted matrices
- const int ne00 = src0->ne[0];
- const int ne01 = src0->ne[1];
- const int ne02 = src0->ne[2];
- const int ne03 = src0->ne[3];
- const int ne0 = dst->ne[0];
- const int ne1 = dst->ne[1];
- const int ne2 = dst->ne[2];
- const int ne3 = dst->ne[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
+
GGML_ASSERT(ne00 == ne0);
GGML_ASSERT(ne00 == ne1);
GGML_ASSERT(ne01 == 1);
GGML_ASSERT(ne02 == ne2);
GGML_ASSERT(ne03 == ne3);
- const int nb00 = src0->nb[0];
- //const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
-
GGML_ASSERT(nb00 == sizeof(float));
GGML_ASSERT(nb0 == sizeof(float));
assert(n_past >= 0);
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
assert(n_past >= 0);
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
assert(n_past >= 0);
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
+ GGML_TENSOR_UNARY_OP_LOCALS;
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
assert(n_past >= 0);
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
+ GGML_TENSOR_UNARY_OP_LOCALS;
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- //const int64_t ne12 = src1->ne[2];
- //const int64_t ne13 = src1->ne[3];
-
- //const int64_t ne0 = dst->ne[0];
- //const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
- //const int64_t ne = ne0*ne1*ne2*ne3;
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- //const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- //const int nb12 = src1->nb[2];
- //const int nb13 = src1->nb[3];
-
- //const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- //const int nb2 = dst->nb[2];
- //const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- //const int64_t ne12 = src1->ne[2];
- //const int64_t ne13 = src1->ne[3];
-
- //const int64_t ne0 = dst->ne[0];
- //const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
- //const int64_t ne = ne0*ne1*ne2*ne3;
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- //const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- //const int nb12 = src1->nb[2];
- //const int nb13 = src1->nb[3];
-
- //const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- //const int nb2 = dst->nb[2];
- //const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- //const int64_t ne12 = src1->ne[2];
- //const int64_t ne13 = src1->ne[3];
-
- //const int64_t ne0 = dst->ne[0];
- //const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
- //const int64_t ne = ne0*ne1*ne2*ne3;
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- //const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- //const int nb12 = src1->nb[2];
- //const int nb13 = src1->nb[3];
-
- //const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- //const int nb2 = dst->nb[2];
- //const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- //const int64_t ne12 = src1->ne[2];
- //const int64_t ne13 = src1->ne[3];
-
- //const int64_t ne0 = dst->ne[0];
- //const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
- //const int64_t ne = ne0*ne1*ne2*ne3;
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- //const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- //const int nb12 = src1->nb[2];
- //const int nb13 = src1->nb[3];
-
- //const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- //const int nb2 = dst->nb[2];
- //const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int ne00 = src0->ne[0];
- const int ne01 = src0->ne[1];
- const int ne02 = src0->ne[2];
- //const int ne03 = src0->ne[3];
-
- const int ne10 = src1->ne[0];
- //const int ne11 = src1->ne[1];
- const int ne12 = src1->ne[2];
- //const int ne13 = src1->ne[3];
-
- const int ne0 = dst->ne[0];
- const int ne1 = dst->ne[1];
- const int ne2 = dst->ne[2];
- //const int ne3 = dst->ne[3];
- //const int ne = ne0*ne1*ne2*ne3;
-
- const int nb00 = src0->nb[0];
- //const int nb01 = src0->nb[1];
- //const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- //const int nb11 = src1->nb[1];
- const int nb12 = src1->nb[2];
- //const int nb13 = src1->nb[3];
-
- //const int nb0 = dst->nb[0];
- //const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- //const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t neq0 = q->ne[0];
- const int64_t neq1 = q->ne[1];
- const int64_t neq2 = q->ne[2];
- const int64_t neq3 = q->ne[3];
-
- const int64_t nek0 = k->ne[0];
- const int64_t nek1 = k->ne[1];
- //const int64_t nek2 = k->ne[2];
- //const int64_t nek3 = k->ne[3];
-
- //const int64_t nev0 = v->ne[0];
- const int64_t nev1 = v->ne[1];
- //const int64_t nev2 = v->ne[2];
- //const int64_t nev3 = v->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
-
- const int nbk0 = k->nb[0];
- const int nbk1 = k->nb[1];
- const int nbk2 = k->nb[2];
- const int nbk3 = k->nb[3];
-
- const int nbq0 = q->nb[0];
- const int nbq1 = q->nb[1];
- const int nbq2 = q->nb[2];
- const int nbq3 = q->nb[3];
-
- const int nbv0 = v->nb[0];
- const int nbv1 = v->nb[1];
- const int nbv2 = v->nb[2];
- const int nbv3 = v->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t neq0 = q->ne[0];
- const int64_t neq1 = q->ne[1];
- const int64_t neq2 = q->ne[2];
- const int64_t neq3 = q->ne[3];
-
- const int64_t nek0 = k->ne[0];
- const int64_t nek1 = k->ne[1];
- //const int64_t nek2 = k->ne[2];
- //const int64_t nek3 = k->ne[3];
-
- //const int64_t nev0 = v->ne[0];
- const int64_t nev1 = v->ne[1];
- //const int64_t nev2 = v->ne[2];
- //const int64_t nev3 = v->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
-
- const int nbk0 = k->nb[0];
- const int nbk1 = k->nb[1];
- const int nbk2 = k->nb[2];
- const int nbk3 = k->nb[3];
-
- const int nbq0 = q->nb[0];
- const int nbq1 = q->nb[1];
- const int nbq2 = q->nb[2];
- const int nbq3 = q->nb[3];
-
- const int nbv0 = v->nb[0];
- const int nbv1 = v->nb[1];
- const int nbv2 = v->nb[2];
- const int nbv3 = v->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t nea0 = a->ne[0];
- const int64_t nea1 = a->ne[1];
- const int64_t nea2 = a->ne[2];
- const int64_t nea3 = a->ne[3];
-
- const int64_t neb00 = b0->ne[0];
- const int64_t neb01 = b0->ne[1];
- //const int64_t neb02 = b0->ne[2];
- //const int64_t neb03 = b0->ne[3];
-
- const int64_t neb10 = b1->ne[0];
- const int64_t neb11 = b1->ne[1];
- //const int64_t neb12 = b1->ne[2];
- //const int64_t neb13 = b1->ne[3];
-
- const int64_t nec00 = c0->ne[0];
- const int64_t nec01 = c0->ne[1];
- //const int64_t nec02 = c0->ne[2];
- //const int64_t nec03 = c0->ne[3];
-
- const int64_t nec10 = c1->ne[0];
- const int64_t nec11 = c1->ne[1];
- //const int64_t nec12 = c1->ne[2];
- //const int64_t nec13 = c1->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
-
- const int nba0 = a->nb[0];
- const int nba1 = a->nb[1];
- const int nba2 = a->nb[2];
- const int nba3 = a->nb[3];
-
- const int nbb00 = b0->nb[0];
- const int nbb01 = b0->nb[1];
- const int nbb02 = b0->nb[2];
- const int nbb03 = b0->nb[3];
-
- const int nbb10 = b1->nb[0];
- //const int nbb11 = b1->nb[1];
- //const int nbb12 = b1->nb[2];
- //const int nbb13 = b1->nb[3];
-
- const int nbc00 = c0->nb[0];
- const int nbc01 = c0->nb[1];
- const int nbc02 = c0->nb[2];
- const int nbc03 = c0->nb[3];
-
- const int nbc10 = c1->nb[0];
- //const int nbc11 = c1->nb[1];
- //const int nbc12 = c1->nb[2];
- //const int nbc13 = c1->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, nea, a, ne);
+ GGML_TENSOR_LOCALS(size_t, nba, a, nb);
+ GGML_TENSOR_LOCALS(int64_t, neb0, b0, ne);
+ GGML_TENSOR_LOCALS(size_t, nbb0, b0, nb);
+ GGML_TENSOR_LOCALS(int64_t, neb1, b1, ne);
+ GGML_TENSOR_LOCALS(size_t, nbb1, b1, nb);
+ GGML_TENSOR_LOCALS(int64_t, nec0, c0, ne);
+ GGML_TENSOR_LOCALS(size_t, nbc0, c0, nb);
+ GGML_TENSOR_LOCALS(int64_t, nec1, c1, ne);
+ GGML_TENSOR_LOCALS(size_t, nbc1, c1, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t neq0 = q->ne[0];
- const int64_t neq1 = q->ne[1];
- const int64_t neq2 = q->ne[2];
- const int64_t neq3 = q->ne[3];
-
- const int64_t nek0 = k->ne[0];
- const int64_t nek1 = k->ne[1];
- //const int64_t nek2 = k->ne[2];
- //const int64_t nek3 = k->ne[3];
-
- const int64_t nev0 = v->ne[0];
- const int64_t nev1 = v->ne[1];
- //const int64_t nev2 = v->ne[2];
- //const int64_t nev3 = v->ne[3];
-
- const int64_t ned0 = d->ne[0];
- const int64_t ned1 = d->ne[1];
- //const int64_t ned2 = d->ne[2];
- //const int64_t ned3 = d->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int nbk0 = k->nb[0];
- const int nbk1 = k->nb[1];
- const int nbk2 = k->nb[2];
- const int nbk3 = k->nb[3];
-
- const int nbq0 = q->nb[0];
- const int nbq1 = q->nb[1];
- const int nbq2 = q->nb[2];
- const int nbq3 = q->nb[3];
-
- const int nbv0 = v->nb[0];
- const int nbv1 = v->nb[1];
- const int nbv2 = v->nb[2];
- const int nbv3 = v->nb[3];
-
- const int nbd0 = d->nb[0];
- const int nbd1 = d->nb[1];
- const int nbd2 = d->nb[2];
- const int nbd3 = d->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
+ GGML_TENSOR_LOCALS(int64_t, ned, d, ne);
+ GGML_TENSOR_LOCALS(size_t, nbd, d, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
const int ith = params->ith;
const int nth = params->nth;
return;
}
- const int64_t ne00 = src0->ne[0]; UNUSED(ne00);
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3]; UNUSED(ne03);
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3]; UNUSED(ne3);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
const int32_t nep0 = ((const int32_t *)(opt0->data))[0];
const int32_t nep1 = ((const int32_t *)(opt0->data))[1];
return;
}
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
const int32_t w = ((const int32_t *)(opt0->data))[0];