cuda : remove nrows_x in mul_mat_q_process_tile (llama/13325)

author R0CKSTAR <redacted>

Wed, 7 May 2025 07:48:23 +0000 (15:48 +0800)

committer Georgi Gerganov <redacted>

Wed, 7 May 2025 18:00:32 +0000 (21:00 +0300)
author R0CKSTAR <redacted>
Wed, 7 May 2025 07:48:23 +0000 (15:48 +0800)
committer Georgi Gerganov <redacted>
Wed, 7 May 2025 18:00:32 +0000 (21:00 +0300)
diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh

index b8143a7b23b39a445bda378dec47d46e636c5dc8..80baf459c15f2544d38ceb974cf719caeee3fd0f 100644 (file)
--- a/ggml/src/ggml-cuda/mmq.cuh
+++ b/ggml/src/ggml-cuda/mmq.cuh
@@ -2522,7 +2522,7 @@ template <ggml_type type, int mmq_x, int nwarps, bool need_check, bool fixup>
  static __device__ __forceinline__ void mul_mat_q_process_tile(
          const char * __restrict__ x, const int offset_x, const int * __restrict__ y,
          const int * __restrict__ ids_dst, float * __restrict__ dst, float * __restrict__ tmp_fixup,
-        const int nrows_x, const int stride_row_x, const int ncols_y, const int stride_col_dst,
+        const int stride_row_x, const int ncols_y, const int stride_col_dst,
          const int tile_x_max_i, const int tile_y_max_j, const int kb0_start, const int kb0_stop) {
  
      constexpr int              qk         = ggml_cuda_type_traits<type>::qk;
@@ -2689,7 +2689,7 @@ static __global__ void mul_mat_q(
  
          constexpr bool fixup = false;
          mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
-            (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst,
+            (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
               tile_x_max_i, tile_y_max_j, 0, ncols_x/qk);
          return;
      }
@@ -2767,7 +2767,7 @@ static __global__ void mul_mat_q(
  
          constexpr bool fixup = false; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer.
          mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
-            (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst,
+            (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
               tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop);
  
          kbc += blocks_per_ne00;
@@ -2834,7 +2834,7 @@ static __global__ void mul_mat_q(
  
      constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks.
      mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
-        (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst,
+        (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
           tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop);
  }
author	R0CKSTAR <redacted>
	Wed, 7 May 2025 07:48:23 +0000 (15:48 +0800)
committer	Georgi Gerganov <redacted>
	Wed, 7 May 2025 18:00:32 +0000 (21:00 +0300)