From: leejet Date: Mon, 18 Dec 2023 16:46:10 +0000 (+0800) Subject: cuda : fix im2col_f32_f16 (#658) X-Git-Tag: upstream/0.0.1642~1172 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=99f3f152b2e5627c25e6f36d4b334aa44e91ff78;p=pkg%2Fggml%2Fsources%2Fggml cuda : fix im2col_f32_f16 (#658) --- diff --git a/src/ggml-cuda.cu b/src/ggml-cuda.cu index 019648bd..2e07bc66 100644 --- a/src/ggml-cuda.cu +++ b/src/ggml-cuda.cu @@ -5259,17 +5259,17 @@ static __global__ void im2col_f32_f16( const int ky = (i - kd) / OW; const int ix = i % OW; - const int iiw = ix * s0 + kx * d0 - p0; - const int iih = blockIdx.y * s1 + ky * d1 - p1; + const int64_t iiw = ix * s0 + kx * d0 - p0; + const int64_t iih = blockIdx.y * s1 + ky * d1 - p1; - const int offset_dst = + const int64_t offset_dst = (blockIdx.y * OW + ix) * CHW + (blockIdx.z * (KW * KH) + ky * KW + kx); if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) { dst[offset_dst] = __float2half(0.0f); } else { - const int offset_src = blockIdx.z * offset_delta; + const int64_t offset_src = blockIdx.z * offset_delta; dst[offset_dst] = __float2half(x[offset_src + iih * IW + iiw]); } }