musa: bump MUSA SDK version to rc3.1.1 (llama/11822)

author R0CKSTAR <redacted>

Thu, 13 Feb 2025 12:28:18 +0000 (20:28 +0800)

committer Georgi Gerganov <redacted>

Thu, 27 Feb 2025 06:55:36 +0000 (08:55 +0200)
author R0CKSTAR <redacted>
Thu, 13 Feb 2025 12:28:18 +0000 (20:28 +0800)
committer Georgi Gerganov <redacted>
Thu, 27 Feb 2025 06:55:36 +0000 (08:55 +0200)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu

index 6ea41577768c3ce9f7b904ab6d1e2292b675d06b..093ad70991b5a208f15db32c100ac91e1babfa04 100644 (file)
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -1480,12 +1480,7 @@ static void ggml_cuda_op_mul_mat(
              const size_t nbytes_data    = ggml_nbytes(src0);
              const size_t nbytes_padding = ggml_row_size(src0->type, MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
              dev[id].src0_dd = dev[id].src0_dd_alloc.alloc(ctx.pool(id), nbytes_data + nbytes_padding);
-        // TODO: remove this for MUSA once the Guilty Lockup issue is resolved
-#ifndef GGML_USE_MUSA
              CUDA_CHECK(cudaMemsetAsync(dev[id].src0_dd, 0, nbytes_data + nbytes_padding, stream));
-#else // GGML_USE_MUSA
-            CUDA_CHECK(cudaMemsetAsync(dev[id].src0_dd + nbytes_data, 0, nbytes_padding, stream));
-#endif // !GGML_USE_MUSA
          }
  
          // If src0 is on a temporary compute buffer (partial offloading) there may be some padding that needs to be cleared:
author	R0CKSTAR <redacted>
	Thu, 13 Feb 2025 12:28:18 +0000 (20:28 +0800)
committer	Georgi Gerganov <redacted>
	Thu, 27 Feb 2025 06:55:36 +0000 (08:55 +0200)