cmake --build . --config Release
```
+Note: Because llama.cpp uses multiple CUDA streams for matrix multiplication results [are not guaranteed to be reproducible](https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility). If you need reproducibility, set `GGML_CUDA_MAX_STREAMS` in the file `ggml-cuda.cu` to 1.
+
### Prepare Data & Run
```bash
arg = argv[i];
if (arg == "-s" || arg == "--seed") {
+#if defined(GGML_USE_CUBLAS)
+ fprintf(stderr, "WARNING: when using cuBLAS generation results are NOT guaranteed to be reproducible.\n");
+#endif
if (++i >= argc) {
invalid_param = true;
break;
CUDA_CHECK(cudaFree(ptr));
}
-#define GGML_CUDA_MAX_STREAMS 8
+#define GGML_CUDA_MAX_STREAMS 8 // Set this to 1 for reproducible matrix multiplication.
#define GGML_CUDA_MAX_EVENTS 64
static cublasHandle_t g_cublasH = nullptr;
static cudaStream_t g_cudaStreams[GGML_CUDA_MAX_STREAMS] = { nullptr };