-DHEXAGON_TOOLS_ROOT=$ENV{HEXAGON_TOOLS_ROOT}
-DHEXAGON_HTP_DEBUG=${GGML_HEXAGON_HTP_DEBUG})
+ExternalProject_Add(htp-v68
+ SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
+ CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v68 -DPREBUILT_LIB_DIR="toolv19_v68")
+
+ExternalProject_Add(htp-v69
+ SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
+ CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v69 -DPREBUILT_LIB_DIR="toolv19_v69")
+
ExternalProject_Add(htp-v73
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v73 -DPREBUILT_LIB_DIR="toolv19_v73")
# Install Hexagon skels required at runtime
install(FILES
+ ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v68.so
+ ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v69.so
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v73.so
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v75.so
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v79.so
#include <chrono>
#include <mutex>
#include <string>
+#include <stdexcept>
#ifdef _WIN32
# include <sal.h>
}
switch (arch_ver.capability & 0xff) {
+ case 0x68:
+ *arch = 68;
+ return 0;
+ case 0x69:
+ *arch = 69;
+ return 0;
case 0x73:
*arch = 73;
return 0;
desc->desctype = HEXAGON_UDMA_DESC_DESCTYPE_TYPE1;
desc->dstbypass = 1;
desc->srcbypass = 1;
+#if __HVX_ARCH__ >= 73
+ desc->dstbypass = 1;
+ desc->srcbypass = 1;
+#else
+ desc->dstbypass = 0;
+ desc->srcbypass = 1;
+#endif
desc->order = 0;
desc->dstate = HEXAGON_UDMA_DESC_DSTATE_INCOMPLETE;
desc->src = (void *) src;
float fp32[VLEN_FP32];
} __attribute__((aligned(VLEN), packed)) HVX_VectorAlias;
+/* Q6_Vsf_equals_Vw is only available on v73+.*/
+#if __HVX_ARCH__ < 73
+static inline HVX_Vector int32_to_qfloat(HVX_Vector const in)
+{
+ HVX_Vector const vzero = Q6_V_vzero();
+ HVX_VectorPred is_zero = Q6_Q_vcmp_eq_VwVw(in, vzero);
+ HVX_Vector lshift = Q6_Vw_vnormamt_Vw(in);
+ HVX_Vector normalized = Q6_Vw_vasl_VwVw(in, lshift);
+ HVX_Vector vexp = Q6_Vw_vsub_VwVw(Q6_V_vsplat_R(0x7f + 30), lshift);
+ HVX_Vector mant = Q6_V_vand_VV(Q6_V_vsplat_R(0xFFFFFF00), normalized);
+ HVX_Vector ret = Q6_V_vmux_QVV(is_zero, vzero, Q6_Vw_vadd_VwVw(mant, vexp));
+ return ret;
+}
+
+static inline HVX_Vector Q6_Vsf_equals_Vw(HVX_Vector const in)
+{
+ return Q6_Vsf_equals_Vqf32(int32_to_qfloat(in));
+}
+#endif
+
static inline HVX_Vector hvx_vec_splat_fp32(float i) {
union {
float f;
}
static int vtcm_acquire(struct htp_context * ctx) {
+ int err;
if (!ctx->vtcm_valid) {
// Temporarily bump thread priority to make sure it's higher than other sessions.
// This way the resource manager will notify the other thread to release VTCM.
// Note that we need to reaquire VTCM at normal priority for this to work next time.
qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio - 10);
- HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
+ err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
+ if (err != 0) {
+ FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err);
+ abort();
+ }
HAP_compute_res_release_cached(ctx->vtcm_rctx);
qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio);
- HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
+ err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
+ if (err != 0) {
+ FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err);
+ abort();
+ }
ctx->vtcm_valid = true;
}
HAP_compute_res_attr_init(&attr);
HAP_compute_res_attr_set_serialize(&attr, 0);
HAP_compute_res_attr_set_cache_mode(&attr, 1);
- HAP_compute_res_attr_set_vtcm_param_v2(&attr, vtcm_size, vtcm_size, vtcm_size);
+ HAP_compute_res_attr_set_vtcm_param_v2(&attr, vtcm_size, 0, vtcm_size);
HAP_compute_res_attr_set_release_callback(&attr, vtcm_release_callback, (void *) ctx);
HAP_compute_res_attr_set_hmx_param(&attr, 1);