musa: update compile flags (llama/16265)

author R0CKSTAR <redacted>

Thu, 2 Oct 2025 13:29:56 +0000 (21:29 +0800)

committer Georgi Gerganov <redacted>

Sun, 12 Oct 2025 08:16:23 +0000 (11:16 +0300)
author R0CKSTAR <redacted>
Thu, 2 Oct 2025 13:29:56 +0000 (21:29 +0800)
committer Georgi Gerganov <redacted>
Sun, 12 Oct 2025 08:16:23 +0000 (11:16 +0300)
diff --git a/ggml/src/ggml-cuda/fattn-vec.cuh b/ggml/src/ggml-cuda/fattn-vec.cuh

index 59c62553b01a2087e55b3273b60368df29f314a2..89ab0f1638bf7e5d071656182d985e7d46927b7b 100644 (file)
--- a/ggml/src/ggml-cuda/fattn-vec.cuh
+++ b/ggml/src/ggml-cuda/fattn-vec.cuh
@@ -535,8 +535,6 @@ void ggml_cuda_flash_attn_ext_vec_case(ggml_backend_cuda_context & ctx, ggml_ten
      float logit_softcap;
      memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float));
  
-    const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
-
      if (Q->ne[1] == 1) {
          constexpr int cols_per_block = 1;
          if (logit_softcap == 0.0f) {
diff --git a/ggml/src/ggml-cuda/topk-moe.cu b/ggml/src/ggml-cuda/topk-moe.cu

index 039f2847196482a5f074a6b6199a84a0b575f94f..afe4aee2403b24a23d6df28c99dc4c897e4fcda8 100644 (file)
--- a/ggml/src/ggml-cuda/topk-moe.cu
+++ b/ggml/src/ggml-cuda/topk-moe.cu
@@ -13,7 +13,7 @@
  
      It is intended as fusion of softmax->top-k->get_rows pipeline for MoE models
  */
-template <size_t n_experts, bool with_norm>
+template <int n_experts, bool with_norm>
  __launch_bounds__(4 * WARP_SIZE, 1) __global__ void topk_moe_cuda(const float * logits,
                                                                    float *       weights,
                                                                    int32_t *     ids,
@@ -204,8 +204,6 @@ void ggml_cuda_op_topk_moe(ggml_backend_cuda_context & ctx,
  
      GGML_ASSERT(ids->nb[1] / ggml_type_size(ids->type) == (size_t) n_experts);
  
-    cudaStream_t stream = ctx.stream();
-
      const int n_expert_used = weights->ne[1];
  
      if (with_norm) {
diff --git a/ggml/src/ggml-musa/CMakeLists.txt b/ggml/src/ggml-musa/CMakeLists.txt

index cdb3818c786c793a9ee4d02c0e4098ca520184fb..f8477a2ef356da3eb53b0550828892e5b5c7a484 100644 (file)
--- a/ggml/src/ggml-musa/CMakeLists.txt
+++ b/ggml/src/ggml-musa/CMakeLists.txt
@@ -56,7 +56,7 @@ if (MUSAToolkit_FOUND)
  
      set_source_files_properties(${GGML_SOURCES_MUSA} PROPERTIES LANGUAGE CXX)
      foreach(SOURCE ${GGML_SOURCES_MUSA})
-        set(COMPILE_FLAGS "-fsigned-char -x musa -mtgpu")
+        set(COMPILE_FLAGS "-Od3 -fno-strict-aliasing -ffast-math -fsigned-char -x musa -mtgpu -fmusa-flush-denormals-to-zero")
          foreach(ARCH ${MUSA_ARCHITECTURES})
              set(COMPILE_FLAGS "${COMPILE_FLAGS} --cuda-gpu-arch=mp_${ARCH}")
          endforeach()
author	R0CKSTAR <redacted>
	Thu, 2 Oct 2025 13:29:56 +0000 (21:29 +0800)
committer	Georgi Gerganov <redacted>
	Sun, 12 Oct 2025 08:16:23 +0000 (11:16 +0300)
ggml/src/ggml-cuda/fattn-vec.cuh		patch \| blob \| history
ggml/src/ggml-cuda/topk-moe.cu		patch \| blob \| history
ggml/src/ggml-musa/CMakeLists.txt		patch \| blob \| history