GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
GGML_ASSERT(src0->type == dst->type);
- const size_t nb00 = src0->nb[0];
- const size_t nb0 = dst->nb[0];
+ const size_t nb0 = ggml_type_size(src0->type);
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
if (ie0 < ie1) {
memcpy(
((char *) dst->data + ie0*nb0),
- ((char *) src0->data + ie0*nb00),
- (ie1 - ie0) * ggml_type_size(src0->type));
+ ((char *) src0->data + ie0*nb0),
+ (ie1 - ie0) * nb0);
}
}
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
- if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
- ggml_compute_forward_dup_same_cont(params, dst);
- return;
- }
-
// parallelize by rows
const int nr = ne01;
// number of rows per thread
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
- if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
- ggml_compute_forward_dup_same_cont(params, dst);
- return;
- }
-
// parallelize by rows
const int nr = ne01;
// number of rows per thread
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
- if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
- ggml_compute_forward_dup_same_cont(params, dst);
- return;
- }
-
// parallelize by rows
const int nr = ne01;
// number of rows per thread
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
GGML_ASSERT(src0->type == dst->type);
+ GGML_TENSOR_UNARY_OP_LOCALS;
+
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
ggml_compute_forward_dup_same_cont(params, dst);
return;
}
- GGML_TENSOR_UNARY_OP_LOCALS;
-
const size_t type_size = ggml_type_size(src0->type);
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
}
test_cases.emplace_back(new test_cont());
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5}));
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7}));
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1}));
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5}));
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7}));
+ test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1}));
+ test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5}));
+ test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7}));
auto add_test_bin_bcast = [&](ggml_type type, std::array<int64_t, 4> ne, std::array<int, 4> nr) {
for (auto op : {ggml_add, ggml_mul, ggml_div}) {