size_t row_size_pd = ggml_row_size(t->type, hex_round_up(t->ne[0], QK_Q4_0x4x2)); // extra elements for the pad
size_t row_size_rp = row_size * 2; // extra space for tmp pad (if any)
+ // Ensure we don't try to read more data than is available in the source buffer 'data'
+ // or write more than the tensor can hold.
+ const size_t total_tensor_size = (size_t)nrows * row_size;
+ const size_t n_bytes_to_copy = size < total_tensor_size ? size : total_tensor_size;
+
+ // Calculate how many full rows and how many remaining bytes we need to process.
+ const int64_t n_full_rows = n_bytes_to_copy / row_size;
+ const size_t n_rem_bytes = n_bytes_to_copy % row_size;
+
void * buf_pd = ggml_aligned_malloc(row_size_pd);
GGML_ASSERT(buf_pd != NULL);
init_row_q4x4x2((block_q4_0 *) buf_pd, t->ne[0]); // init padded buffer to make sure the tail is all zeros
- for (int64_t i = 0; i < nrows; i++) {
+ // 1. Process all the full rows
+ for (int64_t i = 0; i < n_full_rows; i++) {
const uint8_t * src = (const uint8_t *) data + (i * row_size);
uint8_t * dst = (uint8_t *) t->data + (i * row_size);
memcpy(dst, buf_rp, row_size);
}
+ // 2. Process the final, potentially partial, row
+ if (n_rem_bytes > 0) {
+ const int64_t i = n_full_rows;
+ const uint8_t * src = (const uint8_t *) data + (i * row_size);
+ uint8_t * dst = (uint8_t *) t->data + (i * row_size);
+
+ // re-init the row because we are potentially copying a partial row
+ init_row_q4x4x2((block_q4_0 *) buf_pd, t->ne[0]);
+
+ // Copy only the remaining bytes from the source.
+ memcpy(buf_pd, src, n_rem_bytes);
+
+ // Repack the entire buffer
+ repack_row_q4x4x2((uint8_t *) buf_rp, (const block_q4_0 *) buf_pd, t->ne[0]);
+
+ // Write only the corresponding remaining bytes to the destination tensor.
+ memcpy(dst, buf_rp, n_rem_bytes);
+ }
+
ggml_aligned_free(buf_pd, row_size_pd);
ggml_aligned_free(buf_rp, row_size_rp);
}
size_t row_size_pd = ggml_row_size(t->type, hex_round_up(t->ne[0], QK_Q4_0x4x2)); // extra elements for the pad
size_t row_size_rp = row_size * 2; // extra space for tmp pad (if any)
+ // Ensure we don't try to copy more data than the tensor actually contains.
+ const size_t total_tensor_size = (size_t)nrows * row_size;
+ const size_t n_bytes_to_copy = size < total_tensor_size ? size : total_tensor_size;
+
+ // Calculate how many full rows and how many remaining bytes we need to process.
+ const int64_t n_full_rows = n_bytes_to_copy / row_size;
+ const size_t n_rem_bytes = n_bytes_to_copy % row_size;
+
void * buf_pd = ggml_aligned_malloc(row_size_pd);
GGML_ASSERT(buf_pd != NULL);
memset(buf_pd, 0, row_size_pd); // clear-out padded buffer to make sure the tail is all zeros
- for (int64_t i = 0; i < nrows; i++) {
+ // 1. Process all the full rows
+ for (int64_t i = 0; i < n_full_rows; i++) {
const uint8_t * src = (const uint8_t *) t->data + (i * row_size);
uint8_t * dst = (uint8_t *) data + (i * row_size);
memcpy(dst, buf_rp, row_size);
}
+ // 2. Process the final, potentially partial, row
+ if (n_rem_bytes > 0) {
+ const int64_t i = n_full_rows;
+ const uint8_t * src = (const uint8_t *) t->data + (i * row_size);
+ uint8_t * dst = (uint8_t *) data + (i * row_size);
+
+ // We still need to read and unpack the entire source row because quantization is block-based.
+ memcpy(buf_pd, src, row_size);
+ unpack_row_q4x4x2((block_q4_0 *) buf_rp, (const uint8_t *) buf_pd, t->ne[0]);
+
+ // But we only copy the remaining number of bytes to the destination.
+ memcpy(dst, buf_rp, n_rem_bytes);
+ }
+
ggml_aligned_free(buf_pd, row_size_pd);
ggml_aligned_free(buf_rp, row_size_rp);
}
size_t row_size_pd = ggml_row_size(t->type, hex_round_up(t->ne[0], QK_Q8_0x4x2)); // extra elements for the pad
size_t row_size_rp = row_size * 2; // extra space for tmp pad (if any)
+ // Ensure we don't try to read more data than is available in the source buffer 'data'
+ // or write more than the tensor can hold.
+ const size_t total_tensor_size = (size_t)nrows * row_size;
+ const size_t n_bytes_to_copy = size < total_tensor_size ? size : total_tensor_size;
+
+ // Calculate how many full rows and how many remaining bytes we need to process.
+ const int64_t n_full_rows = n_bytes_to_copy / row_size;
+ const size_t n_rem_bytes = n_bytes_to_copy % row_size;
+
void * buf_pd = ggml_aligned_malloc(row_size_pd);
GGML_ASSERT(buf_pd != NULL);
init_row_q8x4x2((block_q8_0 *) buf_pd, t->ne[0]); // init padded buffer to make sure the tail is all zeros
- for (int64_t i = 0; i < nrows; i++) {
+ // 1. Process all the full rows
+ for (int64_t i = 0; i < n_full_rows; i++) {
const uint8_t * src = (const uint8_t *) data + (i * row_size);
uint8_t * dst = (uint8_t *) t->data + (i * row_size);
memcpy(dst, buf_rp, row_size);
}
+ // 2. Process the final, potentially partial, row
+ if (n_rem_bytes > 0) {
+ const int64_t i = n_full_rows;
+ const uint8_t * src = (const uint8_t *) data + (i * row_size);
+ uint8_t * dst = (uint8_t *) t->data + (i * row_size);
+
+ // re-init the row because we are potentially copying a partial row
+ init_row_q8x4x2((block_q8_0 *) buf_pd, t->ne[0]);
+
+ // Copy only the remaining bytes from the source.
+ memcpy(buf_pd, src, n_rem_bytes);
+
+ // Repack the entire buffer
+ repack_row_q8x4x2((uint8_t *) buf_rp, (const block_q8_0 *) buf_pd, t->ne[0]);
+
+ // Write only the corresponding remaining bytes to the destination tensor.
+ memcpy(dst, buf_rp, n_rem_bytes);
+ }
+
ggml_aligned_free(buf_pd, row_size_pd);
ggml_aligned_free(buf_rp, row_size_rp);
}
size_t row_size_pd = ggml_row_size(t->type, hex_round_up(t->ne[0], QK_Q8_0x4x2)); // extra elements for the pad
size_t row_size_rp = row_size * 2; // extra space for tmp pad (if any)
+ // Ensure we don't try to copy more data than the tensor actually contains.
+ const size_t total_tensor_size = (size_t)nrows * row_size;
+ const size_t n_bytes_to_copy = size < total_tensor_size ? size : total_tensor_size;
+
+ // Calculate how many full rows and how many remaining bytes we need to process.
+ const int64_t n_full_rows = n_bytes_to_copy / row_size;
+ const size_t n_rem_bytes = n_bytes_to_copy % row_size;
+
void * buf_pd = ggml_aligned_malloc(row_size_pd);
GGML_ASSERT(buf_pd != NULL);
memset(buf_pd, 0, row_size_pd); // clear-out padded buffer to make sure the tail is all zeros
- for (int64_t i = 0; i < nrows; i++) {
+ // 1. Process all the full rows
+ for (int64_t i = 0; i < n_full_rows; i++) {
const uint8_t * src = (const uint8_t *) t->data + (i * row_size);
uint8_t * dst = (uint8_t *) data + (i * row_size);
memcpy(dst, buf_rp, row_size);
}
+ // 2. Process the final, potentially partial, row
+ if (n_rem_bytes > 0) {
+ const int64_t i = n_full_rows;
+ const uint8_t * src = (const uint8_t *) t->data + (i * row_size);
+ uint8_t * dst = (uint8_t *) data + (i * row_size);
+
+ // We still need to read and unpack the entire source row because quantization is block-based.
+ memcpy(buf_pd, src, row_size);
+ unpack_row_q8x4x2((block_q8_0 *) buf_rp, (const uint8_t *) buf_pd, t->ne[0]);
+
+ // But we only copy the remaining number of bytes to the destination.
+ memcpy(dst, buf_rp, n_rem_bytes);
+ }
+
ggml_aligned_free(buf_pd, row_size_pd);
ggml_aligned_free(buf_rp, row_size_rp);
}
size_t row_size_pd = ggml_row_size(t->type, hex_round_up(t->ne[0], QK_MXFP4x4x2)); // extra elements for the pad
size_t row_size_rp = row_size * 2; // extra space for tmp pad (if any)
+ // Ensure we don't try to read more data than is available in the source buffer 'data'
+ // or write more than the tensor can hold.
+ const size_t total_tensor_size = (size_t)nrows * row_size;
+ const size_t n_bytes_to_copy = size < total_tensor_size ? size : total_tensor_size;
+
+ // Calculate how many full rows and how many remaining bytes we need to process.
+ const int64_t n_full_rows = n_bytes_to_copy / row_size;
+ const size_t n_rem_bytes = n_bytes_to_copy % row_size;
+
void * buf_pd = ggml_aligned_malloc(row_size_pd);
GGML_ASSERT(buf_pd != NULL);
init_row_mxfp4x4x2((block_mxfp4 *) buf_pd, t->ne[0]); // init padded buffer to make sure the tail is all zeros
- for (int64_t i = 0; i < nrows; i++) {
+ // 1. Process all the full rows
+ for (int64_t i = 0; i < n_full_rows; i++) {
const uint8_t * src = (const uint8_t *) data + (i * row_size);
uint8_t * dst = (uint8_t *) t->data + (i * row_size);
memcpy(dst, buf_rp, row_size);
}
+ // 2. Process the final, potentially partial, row
+ if (n_rem_bytes > 0) {
+ const int64_t i = n_full_rows;
+ const uint8_t * src = (const uint8_t *) data + (i * row_size);
+ uint8_t * dst = (uint8_t *) t->data + (i * row_size);
+
+ // re-init the row because we are potentially copying a partial row
+ init_row_mxfp4x4x2((block_mxfp4 *) buf_pd, t->ne[0]);
+
+ // Copy only the remaining bytes from the source.
+ memcpy(buf_pd, src, n_rem_bytes);
+
+ // Repack the entire buffer (partial data + zero padding).
+ repack_row_mxfp4x4x2((uint8_t *) buf_rp, (const block_mxfp4 *) buf_pd, t->ne[0]);
+
+ // Write only the corresponding remaining bytes to the destination tensor.
+ memcpy(dst, buf_rp, n_rem_bytes);
+ }
+
ggml_aligned_free(buf_pd, row_size_pd);
ggml_aligned_free(buf_rp, row_size_rp);
}
size_t row_size_pd = ggml_row_size(t->type, hex_round_up(t->ne[0], QK_MXFP4x4x2)); // extra elements for the pad
size_t row_size_rp = row_size * 2; // extra space for tmp pad (if any)
+ // Ensure we don't try to copy more data than the tensor actually contains.
+ const size_t total_tensor_size = (size_t)nrows * row_size;
+ const size_t n_bytes_to_copy = size < total_tensor_size ? size : total_tensor_size;
+
+ // Calculate how many full rows and how many remaining bytes we need to process.
+ const int64_t n_full_rows = n_bytes_to_copy / row_size;
+ const size_t n_rem_bytes = n_bytes_to_copy % row_size;
+
void * buf_pd = ggml_aligned_malloc(row_size_pd);
GGML_ASSERT(buf_pd != NULL);
memset(buf_pd, 0, row_size_pd); // clear-out padded buffer to make sure the tail is all zeros
- for (int64_t i = 0; i < nrows; i++) {
+ // 1. Process all the full rows
+ for (int64_t i = 0; i < n_full_rows; i++) {
const uint8_t * src = (const uint8_t *) t->data + (i * row_size);
uint8_t * dst = (uint8_t *) data + (i * row_size);
memcpy(dst, buf_rp, row_size);
}
+ // 2. Process the final, potentially partial, row
+ if (n_rem_bytes > 0) {
+ const int64_t i = n_full_rows;
+ const uint8_t * src = (const uint8_t *) t->data + (i * row_size);
+ uint8_t * dst = (uint8_t *) data + (i * row_size);
+
+ // We still need to read and unpack the entire source row because the format is block-based.
+ memcpy(buf_pd, src, row_size);
+ unpack_row_mxfp4x4x2((block_mxfp4 *) buf_rp, (const uint8_t *) buf_pd, t->ne[0]);
+
+ // But we only copy the remaining number of bytes to the destination to respect the size limit.
+ memcpy(dst, buf_rp, n_rem_bytes);
+ }
+
ggml_aligned_free(buf_pd, row_size_pd);
ggml_aligned_free(buf_rp, row_size_rp);
}
switch (tensor->type) {
case GGML_TYPE_Q4_0:
GGML_ASSERT(offset == 0);
- GGML_ASSERT(size == ggml_nbytes(tensor));
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor));
repack_q4_0_q4x4x2(tensor, data, size);
break;
case GGML_TYPE_Q8_0:
GGML_ASSERT(offset == 0);
- GGML_ASSERT(size == ggml_nbytes(tensor));
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor));
repack_q8_0_q8x4x2(tensor, data, size);
break;
case GGML_TYPE_MXFP4:
GGML_ASSERT(offset == 0);
- GGML_ASSERT(size == ggml_nbytes(tensor));
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor));
repack_mxfp4_mxfp4x4x2(tensor, data, size);
break;
switch (tensor->type) {
case GGML_TYPE_Q4_0:
GGML_ASSERT(offset == 0);
- GGML_ASSERT(size == ggml_nbytes(tensor));
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor));
repack_q4x4x2_q4_0(data, tensor, size);
break;
case GGML_TYPE_Q8_0:
GGML_ASSERT(offset == 0);
- GGML_ASSERT(size == ggml_nbytes(tensor));
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor));
repack_q8x4x2_q8_0(data, tensor, size);
break;
case GGML_TYPE_MXFP4:
GGML_ASSERT(offset == 0);
- GGML_ASSERT(size == ggml_nbytes(tensor));
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor));
repack_mxfp4x4x2_mxfp4(data, tensor, size);
break;