//
#define GGML_TENSOR_UNARY_OP_LOCALS \
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
#define GGML_TENSOR_BINARY_OP_LOCALS \
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
- GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); \
- GGML_TENSOR_LOCALS(size_t, nb1, src1, nb); \
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
#if defined(GGML_USE_ACCELERATE)
#include <Accelerate/Accelerate.h>
#define GGML_F16x8_ADD vaddq_f16
#define GGML_F16x8_MUL vmulq_f16
#define GGML_F16x8_REDUCE(res, x) \
- { \
+ do { \
int offset = GGML_F16_ARR >> 1; \
for (int i = 0; i < offset; ++i) { \
x[i] = vaddq_f16(x[i], x[offset+i]); \
const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 (x[0])); \
const float32x4_t t1 = vcvt_f32_f16(vget_high_f16(x[0])); \
res = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1)); \
- }
+ } while (0)
#define GGML_F16_VEC GGML_F16x8
#define GGML_F16_VEC_ZERO GGML_F16x8_ZERO
#define GGML_F32x8_ADD _mm256_add_ps
#define GGML_F32x8_MUL _mm256_mul_ps
#define GGML_F32x8_REDUCE(res, x) \
-{ \
+do { \
int offset = GGML_F32_ARR >> 1; \
for (int i = 0; i < offset; ++i) { \
x[i] = _mm256_add_ps(x[i], x[offset+i]); \
_mm256_extractf128_ps(x[0], 1)); \
const __m128 t1 = _mm_hadd_ps(t0, t0); \
res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
-}
+} while (0)
// TODO: is this optimal ?
#define GGML_F32_VEC GGML_F32x8
{
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
return ((int8_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_I16:
{
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
return ((int16_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_I32:
{
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
return ((int32_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_F16:
{
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
- } break;
+ }
case GGML_TYPE_F32:
{
GGML_ASSERT(tensor->nb[0] == sizeof(float));
return ((float *)(tensor->data))[i];
- } break;
+ }
default:
{
GGML_ASSERT(false);
- } break;
+ }
}
return 0.0f;
void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
switch (tensor->type) {
case GGML_TYPE_I8:
- {
- return ((int8_t *) data)[0];
- } break;
+ return ((int8_t *) data)[0];
case GGML_TYPE_I16:
- {
- return ((int16_t *) data)[0];
- } break;
+ return ((int16_t *) data)[0];
case GGML_TYPE_I32:
- {
- return ((int32_t *) data)[0];
- } break;
+ return ((int32_t *) data)[0];
case GGML_TYPE_F16:
- {
- return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
- } break;
+ return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
case GGML_TYPE_F32:
- {
- return ((float *) data)[0];
- } break;
+ return ((float *) data)[0];
default:
- {
- GGML_ASSERT(false);
- } break;
+ GGML_ASSERT(false);
}
return 0.0f;
{
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
return ((int8_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_I16:
{
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
return ((int16_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_I32:
{
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
return ((int32_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_F16:
{
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
- } break;
+ }
case GGML_TYPE_F32:
{
GGML_ASSERT(tensor->nb[0] == sizeof(float));
return ((float *)(tensor->data))[i];
- } break;
+ }
default:
{
GGML_ASSERT(false);
- } break;
+ }
}
return 0.0f;
void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
switch (tensor->type) {
case GGML_TYPE_I8:
- {
- return ((int8_t *) data)[0];
- } break;
+ return ((int8_t *) data)[0];
case GGML_TYPE_I16:
- {
- return ((int16_t *) data)[0];
- } break;
+ return ((int16_t *) data)[0];
case GGML_TYPE_I32:
- {
- return ((int32_t *) data)[0];
- } break;
+ return ((int32_t *) data)[0];
case GGML_TYPE_F16:
- {
- return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
- } break;
+ return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
case GGML_TYPE_F32:
- {
- return ((float *) data)[0];
- } break;
+ return ((float *) data)[0];
default:
- {
- GGML_ASSERT(false);
- } break;
+ GGML_ASSERT(false);
}
return 0.0f;
return;
}
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
return;
}
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F16);
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
const int nr = ggml_nrows(src0);
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
const int nr = ggml_nrows(src0);
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F16);
const int nr = ggml_nrows(src0);
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const enum ggml_type type = src0->type;
ggml_to_float_t const dequantize_row_q = type_traits[type].to_float;
const int nr = ggml_nrows(src1);
const int nc = src1->ne[0];
- GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
- GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne)
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
// src0 and dst as viewed during acc
const size_t nb0 = ggml_element_size(src0);
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
const int64_t nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
assert(ggml_is_scalar(dst));
assert(src0->nb[0] == sizeof(float));
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb)
ggml_float sum = 0;
ggml_float row_sum = 0;
assert(src0->nb[0] == sizeof(ggml_fp16_t));
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb)
float sum = 0;
float row_sum = 0;
GGML_ASSERT(src0->nb[0] == sizeof(float));
GGML_ASSERT(dst->nb[0] == sizeof(float));
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT(ne0 == 1);
GGML_ASSERT(ne1 == ne01);
assert(src0->nb[0] == sizeof(float));
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
assert(ne0 == 1);
assert(ne1 == ne01);
return;
}
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
// guaranteed to be an integer due to the check in ggml_can_repeat
const int nr0 = (int)(ne0/ne00);
return;
}
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
// guaranteed to be an integer due to the check in ggml_can_repeat
const int nr0 = (int)(ne00/ne0);
const int ith = params->ith;
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
// TODO: support for transposed / permuted tensors
GGML_ASSERT(nb0 == sizeof(float));
const int ith = params->ith;
const int nth = params->nth;
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
float eps;
memcpy(&eps, dst->op_params, sizeof(float));
const int ith = params->ith;
const int nth = params->nth;
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
float eps;
memcpy(&eps, dst->op_params, sizeof(float));
const int ith = params->ith;
const int nth = params->nth;
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
float eps;
memcpy(&eps, dst->op_params, sizeof(float));
const int ith = params->ith;
const int nth = params->nth;
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const float eps = 1e-6f; // TODO: make this a parameter
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
// int64_t t0 = ggml_perf_time_us();
// UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
const int nr = ggml_nrows(src1);
const int nc = src1->ne[0];
- GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
- GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne)
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
// src0 and dst as viewed during set
const size_t nb0 = ggml_element_size(src0);
// TODO: handle transposed/permuted matrices
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT(ne00 == ne0);
GGML_ASSERT(ne00 == ne1);
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
const int n_dims = ((int32_t *) dst->op_params)[1];
const int mode = ((int32_t *) dst->op_params)[2];
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
ggml_compute_forward_conv_1d_s2_ph(params, src0, src1, dst);
} else {
GGML_ASSERT(false); // only stride 1 and 2 supported
- };
+ }
}
// ggml_compute_forward_conv_2d
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
const int ith = params->ith;
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const int scale_factor = dst->op_params[0];
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
- GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
- GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
- GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
- GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
- GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
- GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
- GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
- GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
- GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
- GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_LOCALS(int64_t, nea, a, ne);
- GGML_TENSOR_LOCALS(size_t, nba, a, nb);
- GGML_TENSOR_LOCALS(int64_t, neb0, b0, ne);
- GGML_TENSOR_LOCALS(size_t, nbb0, b0, nb);
- GGML_TENSOR_LOCALS(int64_t, neb1, b1, ne);
- GGML_TENSOR_LOCALS(size_t, nbb1, b1, nb);
- GGML_TENSOR_LOCALS(int64_t, nec0, c0, ne);
- GGML_TENSOR_LOCALS(size_t, nbc0, c0, nb);
- GGML_TENSOR_LOCALS(int64_t, nec1, c1, ne);
- GGML_TENSOR_LOCALS(size_t, nbc1, c1, nb);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, nea, a, ne)
+ GGML_TENSOR_LOCALS(size_t, nba, a, nb)
+ GGML_TENSOR_LOCALS(int64_t, neb0, b0, ne)
+ GGML_TENSOR_LOCALS(size_t, nbb0, b0, nb)
+ GGML_TENSOR_LOCALS(int64_t, neb1, b1, ne)
+ GGML_TENSOR_LOCALS(size_t, nbb1, b1, nb)
+ GGML_TENSOR_LOCALS(int64_t, nec0, c0, ne)
+ GGML_TENSOR_LOCALS(size_t, nbc0, c0, nb)
+ GGML_TENSOR_LOCALS(int64_t, nec1, c1, ne)
+ GGML_TENSOR_LOCALS(size_t, nbc1, c1, nb)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
const int ith = params->ith;
const int nth = params->nth;
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
- GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
- GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
- GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
- GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
- GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
- GGML_TENSOR_LOCALS(int64_t, ned, d, ne);
- GGML_TENSOR_LOCALS(size_t, nbd, d, nb);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
+ GGML_TENSOR_LOCALS(int64_t, ned, d, ne)
+ GGML_TENSOR_LOCALS(size_t, nbd, d, nb)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
const int ith = params->ith;
const int nth = params->nth;
return;
}
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
return;
}
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
const int32_t w = ((const int32_t *)(dst->op_params))[0];
// ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L292-L322
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const int64_t w = ne1;
(*step) *= width;
}
- return GGML_LINESEARCH_FAIL;
+ GGML_UNREACHABLE();
}
static enum ggml_opt_result ggml_opt_lbfgs(
step[0] = 1.0;
}
- return GGML_OPT_DID_NOT_CONVERGE;
+ GGML_UNREACHABLE();
}
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
- };
+ }
} break;
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
- };
+ }
if (!ok) {
break;
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
- };
+ }
} break;
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
- };
+ }
}
// write tensor infos