* cuda : fix bounds check for src0 rows in MMVQ kernel
* Update ggml-cuda/mmvq.cu
Co-authored-by: Johannes Gäßler <redacted>
---------
Co-authored-by: Johannes Gäßler <redacted>
tmp[j][i] = warp_reduce_sum(tmp[j][i]);
}
- if (threadIdx.x < rows_per_cuda_block) {
+ if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 || row0 + threadIdx.x < nrows_dst)) {
dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x];
}
}