// added.
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
- int64_t acl_storage_len = 0;
if (ne == nullptr) {
- acl_storage_len = ggml_nbytes(tensor);
for (int i = 0; i < GGML_MAX_DIMS; i++) {
acl_ne[i] = tensor->ne[i];
// The step size of acl is in elements.
} else {
// With bcast
for (int i = 0; i < dims; i++) {
- acl_storage_len += (ne[i] - 1) * nb[i];
acl_ne[i] = ne[i];
acl_stride[i] = nb[i] / ggml_element_size(tensor);
}
}
- // Reverse ne and stride.
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
+ int64_t acl_storage_len = 1;
+ for (int i = 0; i < final_dims; i++) {
+ acl_storage_len += (acl_ne[i] - 1) * acl_stride[i];
+ }
+
+ // Reverse ne and stride.
std::reverse(acl_ne, acl_ne + final_dims);
std::reverse(acl_stride, acl_stride + final_dims);
tmp_stride[i] = nb[i] / type_size;
}
- std::reverse(tmp_ne, tmp_ne + dims);
- std::reverse(tmp_stride, tmp_stride + dims);
-
- int64_t acl_storage_len = 0;
+ int64_t acl_storage_len = 1;
for (int i = 0; i < dims; i++) {
- acl_storage_len += (ne[i] - 1) * nb[i];
+ acl_storage_len += (tmp_ne[i] - 1) * tmp_stride[i];
}
+ std::reverse(tmp_ne, tmp_ne + dims);
+ std::reverse(tmp_stride, tmp_stride + dims);
+
aclTensor* acl_tensor =
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
format, &acl_storage_len, 1, data_ptr);
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ggml_tensor* src = dst->src[0];
- GGML_ASSERT(src->type == GGML_TYPE_F32);
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
float min;
float max;
float eps;
memcpy(&eps, dst->op_params, sizeof(float));
- GGML_ASSERT(eps > 0.0f);
-
uint64_t workspaceSize = 0;
aclOpExecutor* executor;
void* workspaceAddr = nullptr;
// TODO: use ascendc
// Only test with LLAMA model.
ggml_tensor* src0 = dst->src[0]; // input
- ggml_tensor* src2 = dst->src[2]; // freq_factors
+ // ggml_tensor* src2 = dst->src[2]; // freq_factors, not used now.
// param
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ggml_tensor* src = dst->src[0];
- GGML_ASSERT(src->type == GGML_TYPE_F32);
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
-
aclTensor* acl_src = ggml_cann_create_tensor(src);
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ggml_tensor* src = dst->src[0];
- GGML_ASSERT(src->type == GGML_TYPE_F32);
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
-
aclTensor* acl_src = ggml_cann_create_tensor(src);
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
ACL_CHECK(aclrtSynchronizeDevice());
ACL_CHECK(aclrtResetDevice(cann_ctx->device));
- // finalize when last backend freed.
- if (cann_ctx->device == ggml_backend_cann_get_device_count() - 1) {
- ACL_CHECK(aclFinalize());
- }
-
delete cann_ctx;
delete backend;
}
}
case GGML_OP_MUL_MAT: {
switch (op->src[0]->type) {
- case GGML_TYPE_Q8_0:
case GGML_TYPE_F16:
case GGML_TYPE_F32:
- case GGML_TYPE_Q4_0:
return true;
+ case GGML_TYPE_Q8_0:
+ case GGML_TYPE_Q4_0:
+ // only support contiguous for quantized types.
+ return ggml_is_contiguous(op->src[0]) &&
+ ggml_is_contiguous(op->src[1]);
default:
return false;
}
}
case GGML_OP_ROPE: {
// TODO: with ops-test v == 1
- float * ext_factor = (float*)((int32_t*)op->op_params + 7);
+ float ext_factor = 0.0f;
+ memcpy(&ext_factor, (const float *) op->op_params + 7, sizeof(float));
// TODO: n_dims <= ne0
if (op->src[0]->ne[0] != op->op_params[1]) {
return false;
}
// TODO: ext_factor != 0
- if (*ext_factor != 0) {
+ if (ext_factor != 0) {
return false;
}
}
return true;
}
+ case GGML_OP_POOL_2D: {
+ const int32_t * opts = (const int32_t *) op->op_params;
+ const int k0 = opts[1];
+ const int k1 = opts[2];
+ const int p0 = opts[5];
+ const int p1 = opts[6];
+ // value of paddingH should be at most half of kernelH
+ // value of paddingW should be at most half of kernelW
+ return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
+ }
case GGML_OP_DUP:
case GGML_OP_IM2COL:
case GGML_OP_CONCAT:
case GGML_OP_CLAMP:
case GGML_OP_DIAG_MASK_INF:
case GGML_OP_SOFT_MAX:
- case GGML_OP_POOL_2D:
case GGML_OP_SUM_ROWS:
case GGML_OP_ARGSORT:
case GGML_OP_ACC: