#include "ggml-cuda.h"
#include "ggml.h"
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size");
#define CUDA_CHECK(err) \
#include "ggml.h"
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4267) // possible loss of data
+#endif
+
#define CL_DMMV_BLOCK_SIZE 32
#define MULTILINE_QUOTE(...) #__VA_ARGS__
vram_scratch = n_batch * MB;
ggml_cuda_set_scratch_size(vram_scratch);
if (n_gpu_layers > 0) {
- fprintf(stderr, "%s: allocating batch_size x 1 MB = %ld MB VRAM for the scratch buffer\n",
+ fprintf(stderr, "%s: allocating batch_size x 1 MB = %zd MB VRAM for the scratch buffer\n",
__func__, vram_scratch / MB);
}
}