Windows nvcc workaround (#1753)

author Johannes Gäßler <redacted>

Fri, 9 Jun 2023 11:58:15 +0000 (13:58 +0200)

committer GitHub <redacted>

Fri, 9 Jun 2023 11:58:15 +0000 (13:58 +0200)
author Johannes Gäßler <redacted>
Fri, 9 Jun 2023 11:58:15 +0000 (13:58 +0200)
committer GitHub <redacted>
Fri, 9 Jun 2023 11:58:15 +0000 (13:58 +0200)
diff --git a/ggml-cuda.cu b/ggml-cuda.cu

index b1e513bc9d5f968d9eded1b36669c10388ffd684..a62f26e1e6126368d3a62253d7e34b9e5661b075 100644 (file)
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -1512,6 +1512,14 @@ static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggm
                          i01_high = row_high % ne01;
                      }
                  }
+
+                // There is possibly a bug in the Windows nvcc compiler regarding instruction reordering or optimizing out local variables.
+                // Removing the first assert or changing the order of the arguments causes the second assert to fail.
+                // Removing both asserts results in i01_high becoming 0 which in turn results in garbage output.
+                // The root cause seems to be a problem with i0_offset_high becoming 0 when it should always be >0 (for single GPU).
+                GGML_ASSERT(i01_low == 0 || g_device_count > 1);
+                GGML_ASSERT(i01_high == ne01 || g_device_count > 1);
+
                  const int64_t i01_diff = i01_high - i01_low;
                  if (i01_diff == 0) {
                      continue;
@@ -1727,6 +1735,7 @@ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensor, const
              row_low -= row_low % GGML_CUDA_DMMV_Y;
              row_high = id == g_device_count - 1 ? nrows : nrows*g_tensor_split[id + 1];
              row_high -= row_high % GGML_CUDA_DMMV_Y;
+            GGML_ASSERT(nrows % GGML_CUDA_DMMV_Y == 0);
          } else {
              GGML_ASSERT(false);
          }
author	Johannes Gäßler <redacted>
	Fri, 9 Jun 2023 11:58:15 +0000 (13:58 +0200)
committer	GitHub <redacted>
	Fri, 9 Jun 2023 11:58:15 +0000 (13:58 +0200)