musa: add GGML_UNUSED_VARS (llama/15446)

author R0CKSTAR <redacted>

Thu, 21 Aug 2025 03:06:05 +0000 (11:06 +0800)

committer Georgi Gerganov <redacted>

Fri, 5 Sep 2025 09:54:00 +0000 (12:54 +0300)
author R0CKSTAR <redacted>
Thu, 21 Aug 2025 03:06:05 +0000 (11:06 +0800)
committer Georgi Gerganov <redacted>
Fri, 5 Sep 2025 09:54:00 +0000 (12:54 +0300)
diff --git a/include/ggml.h b/include/ggml.h

index da8813fd278928cf362cade02acd77adc6c8e825..b8b82e11c86f5275a2ac6b634a416cf72266096e 100644 (file)
--- a/include/ggml.h
+++ b/include/ggml.h
@@ -244,6 +244,13 @@
  #define GGML_MROPE_SECTIONS   4
  
  #define GGML_UNUSED(x) (void)(x)
+#ifdef __CUDACC__
+template<typename... Args>
+__host__ __device__ constexpr inline void ggml_unused_vars_impl(Args&&...) noexcept {}
+#define GGML_UNUSED_VARS(...) ggml_unused_vars_impl(__VA_ARGS__)
+#else
+#define GGML_UNUSED_VARS(...) do { (void)sizeof((__VA_ARGS__, 0)); } while(0)
+#endif // __CUDACC__
  
  #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
  
diff --git a/src/ggml-cuda/conv-transpose-1d.cu b/src/ggml-cuda/conv-transpose-1d.cu

index fe4caf674d4d96ca747294287ec9776e98fbabd5..8418ba667318b138d61bd5ee2807f062f553dbbd 100644 (file)
--- a/src/ggml-cuda/conv-transpose-1d.cu
+++ b/src/ggml-cuda/conv-transpose-1d.cu
@@ -34,10 +34,7 @@ static  __global__ void conv_transpose_1d_kernel(
          }
      }
      dst[global_index] = accumulator;
-    GGML_UNUSED(p0); GGML_UNUSED(d0); GGML_UNUSED(src0_ne3);
-    GGML_UNUSED(src1_ne3); GGML_UNUSED(dst_ne3);
-    GGML_UNUSED(src1_ne1); GGML_UNUSED(dst_ne1);
-    GGML_UNUSED(src1_ne2); GGML_UNUSED(dst_ne2);
+    GGML_UNUSED_VARS(p0, d0, src0_ne3, src1_ne3, dst_ne3, src1_ne1, dst_ne1, src1_ne2, dst_ne2);
  }
  
  static void conv_transpose_1d_f32_f32_cuda(
diff --git a/src/ggml-cuda/convert.cu b/src/ggml-cuda/convert.cu

index 7a8b6fdf5f493f73bb1573486775fc685bdd1cd8..ba3d4eeb88085492eae763e965d624d82e246ed9 100644 (file)
--- a/src/ggml-cuda/convert.cu
+++ b/src/ggml-cuda/convert.cu
@@ -71,9 +71,7 @@ static __global__ void dequantize_block_q8_0_f16(const void * __restrict__ vx, h
          y2[iy/2 + threadIdx.x] = __hmul2(make_half2(qs.x, qs.y), __half2half2(d));
      }
  #else
-    GGML_UNUSED(vx);
-    GGML_UNUSED(y);
-    GGML_UNUSED(k);
+    GGML_UNUSED_VARS(vx, y, k);
      NO_DEVICE_CODE;
  #endif // __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
  }
diff --git a/src/ggml-cuda/cpy.cu b/src/ggml-cuda/cpy.cu

index 0380784ab49186c6783e95c616bde9f820356ff5..c40db08cedb05e27af6f50fd8df9c1faa64decad 100644 (file)
--- a/src/ggml-cuda/cpy.cu
+++ b/src/ggml-cuda/cpy.cu
@@ -134,8 +134,7 @@ void ggml_cuda_cpy_dest_ptrs_copy(ggml_cuda_graph * cuda_graph, char ** host_des
      CUDA_CHECK(cudaMemcpyAsync(cuda_graph->dest_ptrs_d, host_dest_ptrs, host_dest_ptrs_size*sizeof(char *), cudaMemcpyHostToDevice, stream));
      cuda_graph->graph_cpynode_index = 0; // reset index
  #else
-    GGML_UNUSED(cuda_graph); GGML_UNUSED(host_dest_ptrs);
-    GGML_UNUSED(host_dest_ptrs_size); GGML_UNUSED(stream);
+    GGML_UNUSED_VARS(cuda_graph, host_dest_ptrs, host_dest_ptrs_size, stream);
  #endif
  }
  
diff --git a/src/ggml-cuda/fattn-mma-f16.cuh b/src/ggml-cuda/fattn-mma-f16.cuh

index 1d7e0b037e756fc18ba8fe47c60e0baf98fc910c..57defb0c629d6984f2066d29d00b3a850a06c2da 100644 (file)
--- a/src/ggml-cuda/fattn-mma-f16.cuh
+++ b/src/ggml-cuda/fattn-mma-f16.cuh
@@ -767,14 +767,11 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
          }
      }
  #else
-    GGML_UNUSED(Q_f2); GGML_UNUSED(K_h2); GGML_UNUSED(V_h2);
-    GGML_UNUSED(mask_h2); GGML_UNUSED(dstk); GGML_UNUSED(dstk_fixup);
-    GGML_UNUSED(scale); GGML_UNUSED(slope); GGML_UNUSED(logit_softcap);
-    GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(stride_K); GGML_UNUSED(stride_V);
-    GGML_UNUSED(stride_mask); GGML_UNUSED(tile_K);
-    GGML_UNUSED(tile_V); GGML_UNUSED(tile_mask); GGML_UNUSED(Q_B);
-    GGML_UNUSED(VKQ_C); GGML_UNUSED(KQ_max); GGML_UNUSED(KQ_rowsum);
-    GGML_UNUSED(kb0); GGML_UNUSED(tile_Q);
+    GGML_UNUSED_VARS(Q_f2, K_h2, V_h2, mask_h2, dstk, dstk_fixup,
+        scale, slope, logit_softcap, ne01, ne02,
+        stride_K, stride_V, stride_mask,
+        tile_Q, tile_K, tile_V, tile_mask,
+        Q_B, VKQ_C, KQ_max, KQ_rowsum, kb0);
      NO_DEVICE_CODE;
  #endif // TURING_MMA_AVAILABLE
  }
@@ -1236,14 +1233,10 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
          }
      }
  #else
-    GGML_UNUSED(Q_f2); GGML_UNUSED(K_h2); GGML_UNUSED(V_h2);
-    GGML_UNUSED(mask_h2); GGML_UNUSED(sinks_f);
-    GGML_UNUSED(dstk); GGML_UNUSED(dstk_fixup);
-    GGML_UNUSED(scale); GGML_UNUSED(slope); GGML_UNUSED(logit_softcap);
-    GGML_UNUSED(ne01); GGML_UNUSED(ne02);
-    GGML_UNUSED(stride_Q1); GGML_UNUSED(stride_Q2);
-    GGML_UNUSED(stride_K); GGML_UNUSED(stride_V); GGML_UNUSED(stride_mask);
-    GGML_UNUSED(jt); GGML_UNUSED(kb0_start); GGML_UNUSED(kb0_stop);
+    GGML_UNUSED_VARS(Q_f2, K_h2, V_h2, mask_h2, sinks_f, dstk, dstk_fixup,
+        scale, slope, logit_softcap, ne01, ne02,
+        stride_Q1, stride_Q2, stride_K, stride_V, stride_mask,
+        jt, kb0_start, kb0_stop);
      NO_DEVICE_CODE;
  #endif // TURING_MMA_AVAILABLE
  }
@@ -1397,17 +1390,15 @@ static __global__ void flash_attn_ext_f16(
          (Q_f2, K_h2, V_h2, mask_h2, sinks_f, dstk, dst_meta, scale, slope, logit_softcap,
           ne01, ne02, stride_Q1, stride_Q2, stride_K, stride_V, stride_mask, jt, kb0_start_kernel, kb0_stop_kernel);
  #else
-    GGML_UNUSED(Q); GGML_UNUSED(K); GGML_UNUSED(V); GGML_UNUSED(mask);
-    GGML_UNUSED(sinks); GGML_UNUSED(KV_max); GGML_UNUSED(dst); GGML_UNUSED(dst_meta);
-    GGML_UNUSED(scale); GGML_UNUSED(max_bias); GGML_UNUSED(m0); GGML_UNUSED(m1);
-    GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap);
-    GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03);
-    GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03);
-    GGML_UNUSED(ne10); GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13);
-    GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13);
-    GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23);
-    GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne33);
-    GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb33);
+    GGML_UNUSED_VARS(Q, K, V, mask, sinks, KV_max, dst, dst_meta, scale,
+        max_bias, m0, m1, n_head_log2, logit_softcap,
+        ne00, ne01, ne02, ne03,
+              nb01, nb02, nb03,
+        ne10, ne11, ne12, ne13,
+              nb11, nb12, nb13,
+              nb21, nb22, nb23,
+              ne31, ne32, ne33,
+              nb31, nb32, nb33);
      NO_DEVICE_CODE;
  #endif // defined(FLASH_ATTN_AVAILABLE) && defined(TURING_MMA_AVAILABLE)
  }
diff --git a/src/ggml-cuda/fattn-tile-f16.cu b/src/ggml-cuda/fattn-tile-f16.cu

index 4111bcc04f75845250f324194d356e71a24218cd..6239d184d0a67baeae0d43870b0d478c85003070 100644 (file)
--- a/src/ggml-cuda/fattn-tile-f16.cu
+++ b/src/ggml-cuda/fattn-tile-f16.cu
@@ -299,17 +299,15 @@ static __global__ void flash_attn_tile_ext_f16(
          }
      }
  #else
-    GGML_UNUSED(Q); GGML_UNUSED(K); GGML_UNUSED(V); GGML_UNUSED(mask);
-    GGML_UNUSED(sinks); GGML_UNUSED(KV_max); GGML_UNUSED(dst); GGML_UNUSED(dst_meta);
-    GGML_UNUSED(scale); GGML_UNUSED(max_bias); GGML_UNUSED(m0); GGML_UNUSED(m1);
-    GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap);
-    GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03);
-    GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03);
-    GGML_UNUSED(ne10); GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13);
-    GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13);
-    GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23);
-    GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne33);
-    GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb33);
+    GGML_UNUSED_VARS(Q, K, V, mask, sinks, KV_max, dst, dst_meta, scale,
+        max_bias, m0, m1, n_head_log2, logit_softcap,
+        ne00, ne01, ne02, ne03,
+              nb01, nb02, nb03,
+        ne10, ne11, ne12, ne13,
+              nb11, nb12, nb13,
+              nb21, nb22, nb23,
+              ne31, ne32, ne33,
+              nb31, nb32, nb33);
      NO_DEVICE_CODE;
  #endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
  }
diff --git a/src/ggml-cuda/fattn-tile-f32.cu b/src/ggml-cuda/fattn-tile-f32.cu

index 1c1dc725d28f50ab293630449991f13dfee3f8c0..b96a9ef97197098ef5507a954e57858ee7e2883f 100644 (file)
--- a/src/ggml-cuda/fattn-tile-f32.cu
+++ b/src/ggml-cuda/fattn-tile-f32.cu
@@ -38,6 +38,15 @@ static __global__ void flash_attn_tile_ext_f32(
      return;
  #endif // FP16_MMA_AVAILABLE
      if (use_logit_softcap && !(D == 128 || D == 256)) {
+        GGML_UNUSED_VARS(Q, K, V, mask, sinks, KV_max, dst, dst_meta, scale,
+            max_bias, m0, m1, n_head_log2, logit_softcap,
+            ne00, ne01, ne02, ne03,
+                  nb01, nb02, nb03,
+            ne10, ne11, ne12, ne13,
+                  nb11, nb12, nb13,
+                  nb21, nb22, nb23,
+                  ne31, ne32, ne33,
+                  nb31, nb32, nb33);
          NO_DEVICE_CODE;
          return;
      }
@@ -301,17 +310,15 @@ static __global__ void flash_attn_tile_ext_f32(
          }
      }
  #else
-    GGML_UNUSED(Q); GGML_UNUSED(K); GGML_UNUSED(V); GGML_UNUSED(mask);
-    GGML_UNUSED(sinks); GGML_UNUSED(KV_max); GGML_UNUSED(dst); GGML_UNUSED(dst_meta);
-    GGML_UNUSED(scale); GGML_UNUSED(max_bias); GGML_UNUSED(m0); GGML_UNUSED(m1);
-    GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap);
-    GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03);
-    GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03);
-    GGML_UNUSED(ne10); GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13);
-    GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13);
-    GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23);
-    GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne33);
-    GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb33);
+    GGML_UNUSED_VARS(Q, K, V, mask, sinks, KV_max, dst, dst_meta, scale,
+        max_bias, m0, m1, n_head_log2, logit_softcap,
+        ne00, ne01, ne02, ne03,
+              nb01, nb02, nb03,
+        ne10, ne11, ne12, ne13,
+              nb11, nb12, nb13,
+              nb21, nb22, nb23,
+              ne31, ne32, ne33,
+              nb31, nb32, nb33);
      NO_DEVICE_CODE;
  #endif // FLASH_ATTN_AVAILABLE
  }
diff --git a/src/ggml-cuda/fattn-vec-f16.cuh b/src/ggml-cuda/fattn-vec-f16.cuh

index 2131b5feeff3cd95485b5ac9106fe691032fb24a..27a2dd6ae448fe8b2a4d5dd8161559a3c1daa8c6 100644 (file)
--- a/src/ggml-cuda/fattn-vec-f16.cuh
+++ b/src/ggml-cuda/fattn-vec-f16.cuh
@@ -349,17 +349,15 @@ static __global__ void flash_attn_vec_ext_f16(
          dst_meta[((sequence*ne01 + ic0 + tid)*ne02 + head)*gridDim.y + blockIdx.y] = make_float2(kqmax[tid], kqsum[tid]);
      }
  #else
-    GGML_UNUSED(Q); GGML_UNUSED(K); GGML_UNUSED(V); GGML_UNUSED(mask);
-    GGML_UNUSED(sinks); GGML_UNUSED(KV_max); GGML_UNUSED(dst); GGML_UNUSED(dst_meta);
-    GGML_UNUSED(scale); GGML_UNUSED(max_bias); GGML_UNUSED(m0); GGML_UNUSED(m1);
-    GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap);
-    GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03);
-    GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03);
-    GGML_UNUSED(ne10); GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13);
-    GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13);
-    GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23);
-    GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne33);
-    GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb33);
+    GGML_UNUSED_VARS(Q, K, V, mask, sinks, KV_max, dst, dst_meta, scale,
+        max_bias, m0, m1, n_head_log2, logit_softcap,
+        ne00, ne01, ne02, ne03,
+              nb01, nb02, nb03,
+        ne10, ne11, ne12, ne13,
+              nb11, nb12, nb13,
+              nb21, nb22, nb23,
+              ne31, ne32, ne33,
+              nb31, nb32, nb33);
      NO_DEVICE_CODE;
  #endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
  }
diff --git a/src/ggml-cuda/fattn-vec-f32.cuh b/src/ggml-cuda/fattn-vec-f32.cuh

index a06fba6cde6c8151063fc606164a393e1442b5ac..da195d0334d7770d08812e5db398bb03cf8094f8 100644 (file)
--- a/src/ggml-cuda/fattn-vec-f32.cuh
+++ b/src/ggml-cuda/fattn-vec-f32.cuh
@@ -37,6 +37,15 @@ static __global__ void flash_attn_vec_ext_f32(
  
      // Skip unused kernel variants for faster compilation:
      if (use_logit_softcap && !(D == 128 || D == 256)) {
+        GGML_UNUSED_VARS(Q, K, V, mask, sinks, KV_max, dst, dst_meta, scale,
+            max_bias, m0, m1, n_head_log2, logit_softcap,
+            ne00, ne01, ne02, ne03,
+                  nb01, nb02, nb03,
+            ne10, ne11, ne12, ne13,
+                  nb11, nb12, nb13,
+                  nb21, nb22, nb23,
+                  ne31, ne32, ne33,
+                  nb31, nb32, nb33);
          NO_DEVICE_CODE;
          return;
      }
@@ -334,17 +343,15 @@ static __global__ void flash_attn_vec_ext_f32(
          dst_meta[((sequence*ne01 + ic0 + tid)*ne02 + head)*gridDim.y + blockIdx.y] = make_float2(kqmax[tid], kqsum[tid]);
      }
  #else
-    GGML_UNUSED(Q); GGML_UNUSED(K); GGML_UNUSED(V); GGML_UNUSED(mask);
-    GGML_UNUSED(sinks); GGML_UNUSED(KV_max); GGML_UNUSED(dst); GGML_UNUSED(dst_meta);
-    GGML_UNUSED(scale); GGML_UNUSED(max_bias); GGML_UNUSED(m0); GGML_UNUSED(m1);
-    GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap);
-    GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03);
-    GGML_UNUSED(ne10); GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13);
-    GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne33);
-    GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb33);
-    GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03);
-    GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13);
-    GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23);
+    GGML_UNUSED_VARS(Q, K, V, mask, sinks, KV_max, dst, dst_meta, scale,
+        max_bias, m0, m1, n_head_log2, logit_softcap,
+        ne00, ne01, ne02, ne03,
+              nb01, nb02, nb03,
+        ne10, ne11, ne12, ne13,
+              nb11, nb12, nb13,
+              nb21, nb22, nb23,
+              ne31, ne32, ne33,
+              nb31, nb32, nb33);
      NO_DEVICE_CODE;
  #endif // FLASH_ATTN_AVAILABLE
  }
diff --git a/src/ggml-cuda/fattn-wmma-f16.cu b/src/ggml-cuda/fattn-wmma-f16.cu

index 2e2de8a0935136a0fca38721f7d6eb07b4d8d37b..2219191fd91524ea5a638d0f4796e146adb269e2 100644 (file)
--- a/src/ggml-cuda/fattn-wmma-f16.cu
+++ b/src/ggml-cuda/fattn-wmma-f16.cu
@@ -471,16 +471,15 @@ static __global__ void flash_attn_ext_f16(
          dst_meta[j_dst_unrolled] = dst_meta_val;
      }
  #else
-    GGML_UNUSED(Q); GGML_UNUSED(K); GGML_UNUSED(V); GGML_UNUSED(mask);
-    GGML_UNUSED(sinks); GGML_UNUSED(KV_max); GGML_UNUSED(dst); GGML_UNUSED(dst_meta);
-    GGML_UNUSED(scale); GGML_UNUSED(max_bias); GGML_UNUSED(m0); GGML_UNUSED(m1);
-    GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap);
-    GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03);
-    GGML_UNUSED(ne10); GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13);
-    GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne33); GGML_UNUSED(nb31);
-    GGML_UNUSED(nb32); GGML_UNUSED(nb33); GGML_UNUSED(nb01); GGML_UNUSED(nb02);
-    GGML_UNUSED(nb03); GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13);
-    GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23);
+    GGML_UNUSED_VARS(Q, K, V, mask, sinks, KV_max, dst, dst_meta, scale,
+        max_bias, m0, m1, n_head_log2, logit_softcap,
+        ne00, ne01, ne02, ne03,
+              nb01, nb02, nb03,
+        ne10, ne11, ne12, ne13,
+              nb11, nb12, nb13,
+              nb21, nb22, nb23,
+              ne31, ne32, ne33,
+              nb31, nb32, nb33);
      NO_DEVICE_CODE;
  #endif // defined(FLASH_ATTN_AVAILABLE) && (__CUDA_ARCH__ == GGML_CUDA_CC_VOLTA || (defined(GGML_HIP_ROCWMMA_FATTN) && defined(FP16_MMA_AVAILABLE)))
  }
diff --git a/src/ggml-cuda/ggml-cuda.cu b/src/ggml-cuda/ggml-cuda.cu

index 1440f2f2e94755bdf417e43284c8d437f5b6fb47..4e17fd211e1bba40a83d8fde3acfb326eafbfe30 100644 (file)
--- a/src/ggml-cuda/ggml-cuda.cu
+++ b/src/ggml-cuda/ggml-cuda.cu
@@ -1328,9 +1328,7 @@ static void ggml_cuda_op_mul_mat_cublas(
                      &beta,  dst_dd_i,    ldc));
      }
  
-    GGML_UNUSED(dst);
-    GGML_UNUSED(src1_ddq_i);
-    GGML_UNUSED(src1_padded_row_size);
+    GGML_UNUSED_VARS(dst, src1_ddq_i, src1_padded_row_size);
  }
  
  static void ggml_cuda_set_peer_access(const int n_tokens, int main_device) {
diff --git a/src/ggml-cuda/mma.cuh b/src/ggml-cuda/mma.cuh

index 83ee16b27d0df448483cb04d2ef8328b3a367e87..667deb9c650175e83ad6c27c15f646743a450557 100644 (file)
--- a/src/ggml-cuda/mma.cuh
+++ b/src/ggml-cuda/mma.cuh
@@ -291,9 +291,7 @@ namespace ggml_cuda_mma {
              : "=r"(xi[0]), "=r"(xi[2]), "=r"(xi[1]), "=r"(xi[3])
              : "l"(xs));
  #else
-        GGML_UNUSED(t);
-        GGML_UNUSED(xs0);
-        GGML_UNUSED(stride);
+        GGML_UNUSED_VARS(t, xs0, stride);
          NO_DEVICE_CODE;
  #endif // TURING_MMA_AVAILABLE
      }
@@ -315,9 +313,7 @@ namespace ggml_cuda_mma {
              : "r"(A.x[1]), "r"(B.x[0]));
  #endif // __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE
  #else
-        GGML_UNUSED(D);
-        GGML_UNUSED(A);
-        GGML_UNUSED(B);
+        GGML_UNUSED_VARS(D, A, B);
          NO_DEVICE_CODE;
  #endif // TURING_MMA_AVAILABLE
      }
@@ -345,9 +341,7 @@ namespace ggml_cuda_mma {
              : "r"(A.x[3]), "r"(B.x[1]));
  #endif // __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE
  #else
-        GGML_UNUSED(D);
-        GGML_UNUSED(A);
-        GGML_UNUSED(B);
+        GGML_UNUSED_VARS(D, A, B);
          NO_DEVICE_CODE;
  #endif // TURING_MMA_AVAILABLE
      }
@@ -372,9 +366,7 @@ namespace ggml_cuda_mma {
              : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[1]));
  #endif // __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE
  #else
-        GGML_UNUSED(D);
-        GGML_UNUSED(A);
-        GGML_UNUSED(B);
+        GGML_UNUSED_VARS(D, A, B);
          NO_DEVICE_CODE;
  #endif // TURING_MMA_AVAILABLE
      }
@@ -408,9 +400,7 @@ namespace ggml_cuda_mma {
              : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[3]));
  #endif // __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE
  #else
-        GGML_UNUSED(D);
-        GGML_UNUSED(A);
-        GGML_UNUSED(B);
+        GGML_UNUSED_VARS(D, A, B);
          NO_DEVICE_CODE;
  #endif // TURING_MMA_AVAILABLE
      }
@@ -425,9 +415,7 @@ namespace ggml_cuda_mma {
              : "+r"(Dxi[0]), "+r"(Dxi[1]), "+r"(Dxi[2]), "+r"(Dxi[3])
              : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[1]));
  #else
-        GGML_UNUSED(D);
-        GGML_UNUSED(A);
-        GGML_UNUSED(B);
+        GGML_UNUSED_VARS(D, A, B);
          NO_DEVICE_CODE;
  #endif // AMPERE_MMA_AVAILABLE
      }
@@ -452,9 +440,7 @@ namespace ggml_cuda_mma {
              : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[1]));
  #endif // __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE
  #else
-        GGML_UNUSED(D);
-        GGML_UNUSED(A);
-        GGML_UNUSED(B);
+        GGML_UNUSED_VARS(D, A, B);
          NO_DEVICE_CODE;
  #endif // TURING_MMA_AVAILABLE
      }
@@ -469,9 +455,7 @@ namespace ggml_cuda_mma {
              : "+r"(Dxi[0]), "+r"(Dxi[1]), "+r"(Dxi[2]), "+r"(Dxi[3])
              : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[1]));
  #else
-        GGML_UNUSED(D);
-        GGML_UNUSED(A);
-        GGML_UNUSED(B);
+        GGML_UNUSED_VARS(D, A, B);
          NO_DEVICE_CODE;
  #endif // AMPERE_MMA_AVAILABLE
      }
@@ -505,9 +489,7 @@ namespace ggml_cuda_mma {
              : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[3]));
  #endif // __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE
  #else
-        GGML_UNUSED(D);
-        GGML_UNUSED(A);
-        GGML_UNUSED(B);
+        GGML_UNUSED_VARS(D, A, B);
          NO_DEVICE_CODE;
  #endif // TURING_MMA_AVAILABLE
      }
@@ -533,9 +515,7 @@ namespace ggml_cuda_mma {
                                                        0, 0, 0);
  #endif // defined(CDNA3)
  #else
-        GGML_UNUSED(D);
-        GGML_UNUSED(A);
-        GGML_UNUSED(B);
+        GGML_UNUSED_VARS(D, A, B);
          NO_DEVICE_CODE;
  #endif // AMD_MFMA_AVAILABLE
      }
@@ -561,9 +541,7 @@ namespace ggml_cuda_mma {
                                                       0, 0, 0);
  #endif // defined(CDNA3)
  #else
-        GGML_UNUSED(D);
-        GGML_UNUSED(A);
-        GGML_UNUSED(B);
+        GGML_UNUSED_VARS(D, A, B);
          NO_DEVICE_CODE;
  #endif // AMD_MFMA_AVAILABLE
      }
diff --git a/src/ggml-cuda/mmf.cu b/src/ggml-cuda/mmf.cu

index 5c66fe5bb13b8fec3ff6ea1ea204214446996939..cfa5c5cce2c23dd4b0f3375edccf146955492f7c 100644 (file)
--- a/src/ggml-cuda/mmf.cu
+++ b/src/ggml-cuda/mmf.cu
@@ -132,11 +132,11 @@ static __global__ void mul_mat_f(
          dst[j*stride_col_dst + row0 + threadIdx.x] = sum;
      }
  #else
+    GGML_UNUSED_VARS(x, y, ids, dst,
+        ncols, nchannels_y, stride_row, stride_col_y, stride_col_dst,
+        channel_ratio, stride_channel_x, stride_channel_y, stride_channel_dst,
+        sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst);
      NO_DEVICE_CODE;
-    GGML_UNUSED(x); GGML_UNUSED(y); GGML_UNUSED(ids); GGML_UNUSED(dst);
-    GGML_UNUSED(ncols); GGML_UNUSED(nchannels_y); GGML_UNUSED(stride_row); GGML_UNUSED(stride_col_y); GGML_UNUSED(stride_col_dst);
-    GGML_UNUSED(channel_ratio); GGML_UNUSED(stride_channel_x); GGML_UNUSED(stride_channel_y); GGML_UNUSED(stride_channel_dst);
-    GGML_UNUSED(sample_ratio); GGML_UNUSED(stride_sample_x); GGML_UNUSED(stride_sample_y); GGML_UNUSED(stride_sample_dst);
  #endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA)
  }
  
diff --git a/src/ggml-cuda/mmq.cu b/src/ggml-cuda/mmq.cu

index 384ee7615f7a4c9a48fa0f1202cdff9348bcf8c5..576032a0ce0dd49c7c1755a3c7bb8cb50b7fda1c 100644 (file)
--- a/src/ggml-cuda/mmq.cu
+++ b/src/ggml-cuda/mmq.cu
@@ -266,10 +266,7 @@ void ggml_cuda_op_mul_mat_q(
  
      ggml_cuda_mul_mat_q_switch_type(ctx, args, stream);
  
-    GGML_UNUSED(src1);
-    GGML_UNUSED(dst);
-    GGML_UNUSED(src1_ddf_i);
-    GGML_UNUSED(src1_padded_row_size);
+    GGML_UNUSED_VARS(src1, dst, src1_ddf_i, src1_padded_row_size);
  }
  
  bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
diff --git a/src/ggml-cuda/mmq.cuh b/src/ggml-cuda/mmq.cuh

index c22907d404494ff3b344d86c4d8eed20f43a9b89..650f7080677ad9f0bb795142e29ddffbf0eaecd9 100644 (file)
--- a/src/ggml-cuda/mmq.cuh
+++ b/src/ggml-cuda/mmq.cuh
@@ -1255,7 +1255,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma(
          }
      }
  #else
-    GGML_UNUSED(x); GGML_UNUSED(y); GGML_UNUSED(sum); GGML_UNUSED(k00);
+    GGML_UNUSED_VARS(x, y, sum, k00);
      NO_DEVICE_CODE;
  #endif // AMD_MFMA_AVAILABLE
  }
@@ -1572,7 +1572,7 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma(
          }
      }
  #else
-    GGML_UNUSED(x); GGML_UNUSED(y); GGML_UNUSED(sum); GGML_UNUSED(k00);
+    GGML_UNUSED_VARS(x, y, sum, k00);
      NO_DEVICE_CODE;
  #endif // AMD_MFMA_AVAILABLE
  }
@@ -2301,7 +2301,7 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma(
          }
      }
  #else
-    GGML_UNUSED(x); GGML_UNUSED(y); GGML_UNUSED(sum); GGML_UNUSED(k00);
+    GGML_UNUSED_VARS(x, y, sum, k00);
      NO_DEVICE_CODE;
  #endif // AMD_MFMA_AVAILABLE
  }
diff --git a/src/ggml-cuda/mmvf.cu b/src/ggml-cuda/mmvf.cu

index 16100b680456a5c4a3be9e9485576469c02e495c..5b21ef05b3c3595a867c0b2b47c06f3004157231 100644 (file)
--- a/src/ggml-cuda/mmvf.cu
+++ b/src/ggml-cuda/mmvf.cu
@@ -433,12 +433,7 @@ void ggml_cuda_op_mul_mat_vec_f(
              GGML_ABORT("unsupported type: %s", ggml_type_name(src0->type));
      }
  
-    GGML_UNUSED(ctx);
-    GGML_UNUSED(src1);
-    GGML_UNUSED(dst);
-    GGML_UNUSED(src1_ddq_i);
-    GGML_UNUSED(src1_ncols);
-    GGML_UNUSED(src1_padded_row_size);
+    GGML_UNUSED_VARS(ctx, src1, dst, src1_ddq_i, src1_ncols, src1_padded_row_size);
  }
  
  bool ggml_cuda_should_use_mmvf(enum ggml_type type, int cc, const int64_t * src0_ne, int64_t ne11) {
diff --git a/src/ggml-cuda/mmvq.cu b/src/ggml-cuda/mmvq.cu

index 5c8e5c4a7eeb589184137b37467327c06f94a8ac..b7c3079308e3f473e3e3fb3964f8157971ee1362 100644 (file)
--- a/src/ggml-cuda/mmvq.cu
+++ b/src/ggml-cuda/mmvq.cu
@@ -596,9 +596,5 @@ void ggml_cuda_op_mul_mat_vec_q(
          src0_dd_i, src0->type, src1_ddq_i, nullptr, dst_dd_i, ne00, row_diff, src1_ncols, stride_row_x, stride_col_y, nrows_dst,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, stream);
  
-    GGML_UNUSED(src1);
-    GGML_UNUSED(dst);
-    GGML_UNUSED(src1_ddf_i);
-    GGML_UNUSED(src1_ncols);
-    GGML_UNUSED(src1_padded_row_size);
+    GGML_UNUSED_VARS(src1, dst, src1_ddf_i, src1_ncols, src1_padded_row_size);
  }
author	R0CKSTAR <redacted>
	Thu, 21 Aug 2025 03:06:05 +0000 (11:06 +0800)
committer	Georgi Gerganov <redacted>
	Fri, 5 Sep 2025 09:54:00 +0000 (12:54 +0300)
include/ggml.h		patch \| blob \| history
src/ggml-cuda/conv-transpose-1d.cu		patch \| blob \| history
src/ggml-cuda/convert.cu		patch \| blob \| history
src/ggml-cuda/cpy.cu		patch \| blob \| history
src/ggml-cuda/fattn-mma-f16.cuh		patch \| blob \| history
src/ggml-cuda/fattn-tile-f16.cu		patch \| blob \| history
src/ggml-cuda/fattn-tile-f32.cu		patch \| blob \| history
src/ggml-cuda/fattn-vec-f16.cuh		patch \| blob \| history
src/ggml-cuda/fattn-vec-f32.cuh		patch \| blob \| history
src/ggml-cuda/fattn-wmma-f16.cu		patch \| blob \| history
src/ggml-cuda/ggml-cuda.cu		patch \| blob \| history
src/ggml-cuda/mma.cuh		patch \| blob \| history
src/ggml-cuda/mmf.cu		patch \| blob \| history
src/ggml-cuda/mmq.cu		patch \| blob \| history
src/ggml-cuda/mmq.cuh		patch \| blob \| history
src/ggml-cuda/mmvf.cu		patch \| blob \| history
src/ggml-cuda/mmvq.cu		patch \| blob \| history