#include <ggml.h>
#include <ggml-alloc.h>
#include <ggml-backend.h>
-#include <ggml-backend-impl.h>
#include <algorithm>
#include <array>
im = nullptr;
}
}
+
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im);
GGML_ASSERT(ggml_validate_row_data(tensor->type, dataq.data(), dataq.size()));
+ // TODO: other cases
+ //#pragma omp parallel for
+ //for (int i = 0; i < tensor->ne[1]; i++) {
+ // ggml_quantize_chunk(tensor->type, data.data(), dataq.data(),
+ // i * tensor->ne[0], 1, tensor->ne[0], im);
+ //}
+
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
// This is going to create some weird integers though.
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
tt.to_float(&buf[i], vq.data(), bs);
tv.insert(tv.end(), vq.begin(), vq.end());
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
}
}
test_dup(ggml_type type = GGML_TYPE_F32,
- std::array<int64_t, 4> ne = {10, 10, 10, 1},
+ std::array<int64_t, 4> ne = {10, 10, 20, 1},
std::array<int64_t, 4> permute = {0, 0, 0, 0})
: type(type), ne(ne), permute(permute),
_use_permute(permute[0] + permute[1] + permute[2] + permute[3] > 0) {}
const ggml_type type_src;
const ggml_type type_dst;
const std::array<int64_t, 4> ne;
+ const std::array<int64_t, 4> permute;
+ bool _src_use_permute;
std::string vars() override {
- return VARS_TO_STR3(type_src, type_dst, ne);
+ return VARS_TO_STR4(type_src, type_dst, ne, permute);
}
double max_nmse_err() override {
}
test_cpy(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
- std::array<int64_t, 4> ne = {10, 10, 10, 1})
- : type_src(type_src), type_dst(type_dst), ne(ne) {}
+ std::array<int64_t, 4> ne = {10, 10, 10, 1},
+ std::array<int64_t, 4> permute = {0, 0, 0, 0})
+ : type_src(type_src), type_dst(type_dst), ne(ne), permute(permute),
+ _src_use_permute(permute[0] + permute[1] + permute[2] + permute[3] > 0) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
- ggml_tensor * dst = ggml_new_tensor(ctx, type_dst, 4, ne.data());
+ if (_src_use_permute) {
+ src = ggml_permute(ctx, src, permute[0], permute[1], permute[2], permute[3]);
+ }
+ ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, src->ne);
ggml_tensor * out = ggml_cpy(ctx, src, dst);
return out;
}
}
};
+
// GGML_OP_ROPE
struct test_rope : public test_case {
const ggml_type type;
}
};
+// GGML_OP_CONV_TRANSPOSE_1D
+struct test_conv_transpose_1d : public test_case {
+ const std::array<int64_t, 4> ne_input;
+ const std::array<int64_t, 4> ne_kernel;
+
+ const int s0; // stride
+ const int p0; // padding
+ const int d0; // dilation
+
+ std::string vars() override {
+ return VARS_TO_STR5(ne_input, ne_kernel, s0, p0, d0);
+ }
+
+ test_conv_transpose_1d(std::array<int64_t, 4> ne_input = {197, 32, 1, 1}, // [input_width, input_height, input_channels, 1]
+ std::array<int64_t, 4> ne_kernel = {16, 32, 32, 1}, // [kernel_width, kernel_height, input_channels, 1]
+ int s0 = 1, int p0 = 0, int d0 = 1)
+ : ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), p0(p0), d0(d0) {}
+
+ ggml_tensor * build_graph(ggml_context * ctx) override {
+ ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
+ ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
+ ggml_tensor * out = ggml_conv_transpose_1d(ctx, kernel, input, s0, p0, d0);
+ return out;
+ }
+};
+
// GGML_OP_IM2COL
struct test_im2col : public test_case {
const ggml_type type_input;
// padding
const int p0;
const int p1;
- // dilatation
+ // dilation
const int d0;
const int d1;
// mode
ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
}
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
}
const ggml_type type;
const std::array<int64_t, 4> ne;
const int32_t num_groups;
+ const float eps;
std::string vars() override {
return VARS_TO_STR3(type, ne, num_groups);
test_group_norm(ggml_type type = GGML_TYPE_F32,
std::array<int64_t, 4> ne = {64, 64, 320, 1},
- int32_t num_groups = 32)
- : type(type), ne(ne), num_groups(num_groups) {}
+ int32_t num_groups = 32,
+ float eps = 1e-6f)
+ : type(type), ne(ne), num_groups(num_groups), eps(eps) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
- ggml_tensor * out = ggml_group_norm(ctx, a, num_groups);
+ ggml_tensor * out = ggml_group_norm(ctx, a, num_groups, eps);
return out;
}
};
GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
+ GGML_TYPE_BF16,
};
// unary ops
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32));
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16));
+ // test cases for 1D im2col
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
+
+ test_cases.emplace_back(new test_conv_transpose_1d());
+ test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 3, 0, 1));
+ test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 2, 0, 1));
+ test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 1, 0, 1));
+ test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,2,2,1}, 2, 0, 1));
+ test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,2,2,1}, 1, 0, 1));
+ test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1));
+ test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1));
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 1, 1}));
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {2, 1, 1, 1}));
test_cases.emplace_back(new test_dup(GGML_TYPE_F16));
test_cases.emplace_back(new test_dup(GGML_TYPE_I32));
test_cases.emplace_back(new test_dup(GGML_TYPE_I16));
+ test_cases.emplace_back(new test_dup(GGML_TYPE_F32, {10, 10, 5, 1}, {0, 2, 1, 3}));
+ test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {0, 2, 1, 3})); // dup by rows
+ test_cases.emplace_back(new test_dup(GGML_TYPE_F32, {10, 10, 5, 1}, {1, 0, 2, 3}));
+ test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {1, 0, 2, 3})); // dup dst not-contiguous
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {0, 2, 1, 3}));
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3}));
for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
for (ggml_type type_dst : all_types) {
test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 4, 4, 4}));
+ test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {0, 2, 1, 3})); // cpy by rows
+ }
+ }
+ for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
+ for (ggml_type type_dst : {GGML_TYPE_F16, GGML_TYPE_F32}) {
+ test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {1, 0, 2, 3})); // cpy not-contiguous
}
}
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 10, 10, 10}, eps));
}
+#if 1
for (ggml_type type_a : base_types) {
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1}, {1, 1}));
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {10, 10}, {2, 2}));
}
}
+#else
+ // m = a rows
+ // n = b rows
+ // k = cols
+ std::uniform_int_distribution<> dist_m(1, 128);
+ std::uniform_int_distribution<> dist_n(16, 128);
+ std::uniform_int_distribution<> dist_k(1, 16);
+ for (int i = 0; i < 1000; i++) {
+ for (ggml_type type_a : all_types) {
+ for (ggml_type type_b : {GGML_TYPE_F32}) {
+ int m = dist_m(rng);
+ int n = dist_n(rng);
+ int k = dist_k(rng) * ggml_blck_size(type_a);
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, m, n, k, { 1, 1}, {1, 1}));
+ }
+ }
+ }
+#endif
for (ggml_type type_a : other_types) {
for (ggml_type type_b : {GGML_TYPE_F32}) {
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1}, {1, 1}));
+ if (ggml_blck_size(type_a) != 256) {
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, ggml_blck_size(type_a), {1, 1}, {1, 1}));
+ }
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 1}));
}
}
for (int n = 0; n < 10; ++n) {
int64_t ne0 = dist_ne0(rng);
int64_t ne1 = dist_ne1(rng);
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, n/2 == 0, 0.1f, ne0 < 1000 ? 4.0f : 0.0f));
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, GGML_TYPE_F32, {ne0, ne1, 1, 1}, n/2 == 0, 0.1f, ne0 < 1000 ? 4.0f : 0.0f));
}
exponent <<= 1;
}
}
}
-
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, 0.1f, 0.0f));
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, false, 0.1f, 0.0f));
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f, 0.0f));
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f, 8.0f));
return true;
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
return false;
}