model : support LiquidAI LFM2 hybrid family (llama/14620)

author Tarek Dakhran <redacted>

Fri, 11 Jul 2025 18:27:01 +0000 (20:27 +0200)

committer Georgi Gerganov <redacted>

Sat, 12 Jul 2025 13:05:00 +0000 (16:05 +0300)
author Tarek Dakhran <redacted>
Fri, 11 Jul 2025 18:27:01 +0000 (20:27 +0200)
committer Georgi Gerganov <redacted>
Sat, 12 Jul 2025 13:05:00 +0000 (16:05 +0300)
diff --git a/src/ggml-cuda/ssm-conv.cu b/src/ggml-cuda/ssm-conv.cu

index f637571963730511a902f13066a9a721851504dc..41979733601d27a6bba66ddd3bc1f78a8fde0396 100644 (file)
--- a/src/ggml-cuda/ssm-conv.cu
+++ b/src/ggml-cuda/ssm-conv.cu
@@ -107,8 +107,11 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int
          if (nc == 4) {
              ssm_conv_f32<threads, 4><<<blocks, threads, 0, stream>>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1,
                                                                       dst, dst_nb0, dst_nb1, dst_nb2, n_t);
+        } else if (nc == 3) {
+            ssm_conv_f32<threads, 3><<<blocks, threads, 0, stream>>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1,
+                                                                     dst, dst_nb0, dst_nb1, dst_nb2, n_t);
          } else {
-            GGML_ABORT("Only support kernel size = 4  now.");
+            GGML_ABORT("Only support kernel size = 3 or size = 4 right now.");
          }
      } else {
          if (nc == 4) {
@@ -116,8 +119,13 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int
              dim3          blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t);
              ssm_conv_long_token_f32<threads, 4, split_n_t><<<blocks, threads, 0, stream>>>(
                  src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t);
+        } else if (nc == 3) {
+            const int64_t split_n_t = 32;
+            dim3          blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t);
+            ssm_conv_long_token_f32<threads, 3, split_n_t><<<blocks, threads, 0, stream>>>(
+                src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t);
          } else {
-            GGML_ABORT("Only support kernel size = 4 right now.");
+            GGML_ABORT("Only support kernel size = 3 or size = 4 right now.");
          }
      }
  }
author	Tarek Dakhran <redacted>
	Fri, 11 Jul 2025 18:27:01 +0000 (20:27 +0200)
committer	Georgi Gerganov <redacted>
	Sat, 12 Jul 2025 13:05:00 +0000 (16:05 +0300)