From: Xuan-Son Nguyen Date: Thu, 18 Dec 2025 23:18:01 +0000 (+0100) Subject: model : add ASR support for LFM2-Audio-1.5B (conformer) (llama/18106) X-Git-Tag: v0.9.5~56 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=cdb1e3f3c079faf18caa944d7a996fe99ef758e3;p=pkg%2Fggml%2Fsources%2Fggml model : add ASR support for LFM2-Audio-1.5B (conformer) (llama/18106) * ASR with LFM2-Audio-1.5B * Set rope_theta * Fix comment * Remove rope_theta setting * Address PR feedback * rename functions to conformer * remove some redundant ggml_cont * fix missing tensor * add prefix "a." for conv tensors * remove redundant reshape * clean up * add test model --------- Co-authored-by: Tarek Dakhran --- diff --git a/src/ggml-cuda/ssm-conv.cu b/src/ggml-cuda/ssm-conv.cu index 41979733..6d5ea704 100644 --- a/src/ggml-cuda/ssm-conv.cu +++ b/src/ggml-cuda/ssm-conv.cu @@ -102,31 +102,25 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int const int threads = 128; GGML_ASSERT(nr % threads == 0); - if (n_t <= 32) { - const dim3 blocks(n_s, (nr + threads - 1) / threads, 1); - if (nc == 4) { - ssm_conv_f32<<>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, - dst, dst_nb0, dst_nb1, dst_nb2, n_t); - } else if (nc == 3) { - ssm_conv_f32<<>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, - dst, dst_nb0, dst_nb1, dst_nb2, n_t); + auto launch_kernel = [&](auto NC) { + constexpr int kNC = decltype(NC)::value; + if (n_t <= 32) { + const dim3 blocks(n_s, (nr + threads - 1) / threads, 1); + ssm_conv_f32<<>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, + dst, dst_nb0, dst_nb1, dst_nb2, n_t); } else { - GGML_ABORT("Only support kernel size = 3 or size = 4 right now."); - } - } else { - if (nc == 4) { - const int64_t split_n_t = 32; - dim3 blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t); - ssm_conv_long_token_f32<<>>( - src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t); - } else if (nc == 3) { const int64_t split_n_t = 32; dim3 blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t); - ssm_conv_long_token_f32<<>>( + ssm_conv_long_token_f32<<>>( src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t); - } else { - GGML_ABORT("Only support kernel size = 3 or size = 4 right now."); } + }; + + switch (nc) { + case 3: launch_kernel(std::integral_constant{}); break; + case 4: launch_kernel(std::integral_constant{}); break; + case 9: launch_kernel(std::integral_constant{}); break; + default: GGML_ABORT("Only support kernel sizes 3, 4, 9 right now."); } } diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 416218b5..53c172fd 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -7295,11 +7295,11 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, {64, 5, 4, 3}, 1e-12f)); - for (int64_t d_conv : {3, 4}) { + for (int64_t d_conv : {3, 4, 9}) { for (int64_t d_inner: {1024, 1536, 2048}) { - test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, d_inner, 1, 1}, {d_conv, d_inner, 1, 1})); - test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {8, d_inner, 1, 1}, {d_conv, d_inner, 1, 1})); - test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, d_inner, 4, 1}, {d_conv, d_inner, 1, 1})); + test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {d_conv, d_inner, 1, 1}, {d_conv, d_inner, 1, 1})); + test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {2 * d_conv, d_inner, 1, 1}, {d_conv, d_inner, 1, 1})); + test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {d_conv, d_inner, 4, 1}, {d_conv, d_inner, 1, 1})); } }