block_q4_2 * restrict y = vy;
- quantize_row_q4_2_reference(x, y, k);
+ //quantize_row_q4_2_reference(x, y, k);
// This produces the exact same format, just better match to the input floats ("better" as measured by RMSE)
- //quantize_row_q4_2_rmse(x, y, k);
+ quantize_row_q4_2_rmse(x, y, k);
}
static void quantize_row_q4_3_reference(const float * restrict x, block_q4_3 * restrict y, int k) {
else if (type == GGML_TYPE_Q4_2) {
dequantize_row_q_cuda = dequantize_row_q4_2_cuda;
}
+ else if (type == GGML_TYPE_Q4_3) {
+ dequantize_row_q_cuda = dequantize_row_q4_3_cuda;
+ }
else {
GGML_ASSERT(false);
}