io.write(&v_trans, sizeof(v_trans));
io.write(&n_layer, sizeof(n_layer));
- std::vector<uint8_t> tmp_buf;
-
// Iterate and write all the keys first, each row is a cell
// Get whole range at a time
for (const auto & layer : layers) {
const uint64_t k_size_row = ggml_row_size(k->type, n_embd_k_gqa);
io.write(&k_size_row, sizeof(k_size_row));
- // Read each range of cells of k_size length each into tmp_buf and write out
+ // Read each range of cells of k_size length and write out
for (const auto & range : cr.data) {
const size_t range_size = range.second - range.first;
const size_t buf_size = range_size * k_size_row;
const uint64_t v_size_row = ggml_row_size(v->type, n_embd_v_gqa);
io.write(&v_size_row, sizeof(v_size_row));
- // Read each range of cells of v_size length each into tmp_buf and write out
+ // Read each range of cells of v_size length and write out
for (const auto & range : cr.data) {
const size_t range_size = range.second - range.first;
const size_t buf_size = range_size * v_size_row;
// For each row, we get the element values of each cell
for (uint32_t j = 0; j < n_embd_v_gqa; ++j) {
- // Read each range of cells of v_size_el length each into tmp_buf and write out
+ // Read each range of cells of v_size_el length and write out
for (const auto & range : cr.data) {
const size_t range_size = range.second - range.first;
const size_t src_offset = (range.first + j * kv_size) * v_size_el;
io.write(&s_trans, sizeof(s_trans));
io.write(&n_layer, sizeof(n_layer));
- std::vector<uint8_t> tmp_buf;
-
// Iterate and write all the keys first, each row is a cell
// Get whole range at a time
for (uint32_t il = 0; il < n_layer; ++il) {
const uint64_t r_size_row = ggml_row_size(r_l[il]->type, hparams.n_embd_r());
io.write(&r_size_row, sizeof(r_size_row));
- // Read each range of cells of k_size length each into tmp_buf and write out
+ // Read each range of cells of k_size length and write out
for (const auto & range : cell_ranges) {
const size_t range_size = range.second - range.first;
const size_t buf_size = range_size * r_size_row;
const uint64_t s_size_row = ggml_row_size(s_l[il]->type, hparams.n_embd_s());
io.write(&s_size_row, sizeof(s_size_row));
- // Read each range of cells of s_size length each into tmp_buf and write out
+ // Read each range of cells of s_size length and write out
for (const auto & range : cell_ranges) {
const size_t range_size = range.second - range.first;
const size_t buf_size = range_size * s_size_row;
// For each row, we get the element values of each cell
for (uint32_t j = 0; j < n_embd_s; ++j) {
- // Read each range of cells of v_size_el length each into tmp_buf and write out
+ // Read each range of cells of v_size_el length and write out
for (const auto & range : cell_ranges) {
const size_t range_size = range.second - range.first;
const size_t src_offset = (range.first + j * mem_size) * s_size_el;