struct Stats {
std::vector<float> values;
+ std::vector<int> counts;
int ncall = 0;
};
auto & e = m_stats[wname];
++e.ncall;
- // NOTE: since we select top-k experts, the number of calls for the expert tensors will be k times larger
- // using the following line, we can correct for that if needed by replacing the line above with:
- //if (idx == t->src[0]->ne[0] - 1) ++e.ncall;
if (e.values.empty()) {
e.values.resize(src1->ne[0]*n_as, 0);
+ e.counts.resize(src1->ne[0]*n_as, 0);
}
else if (e.values.size() != (size_t)src1->ne[0]*n_as) {
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);
for (int j = 0; j < (int)src1->ne[0]; ++j) {
e.values[e_start + j] += x[j]*x[j];
+ e.counts[e_start + j]++;
}
}
}
auto& e = m_stats[wname];
if (e.values.empty()) {
e.values.resize(src1->ne[0], 0);
+ e.counts.resize(src1->ne[0], 0);
}
else if (e.values.size() != (size_t)src1->ne[0]) {
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]);
const float * x = data + row * src1->ne[0];
for (int j = 0; j < (int)src1->ne[0]; ++j) {
e.values[j] += x[j]*x[j];
+ e.counts[j]++;
}
}
if (e.ncall > m_last_call) {
out.write((const char *) &p.second.ncall, sizeof(p.second.ncall));
int nval = p.second.values.size();
out.write((const char *) &nval, sizeof(nval));
- if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float));
+ if (nval > 0) {
+ std::vector<float> tmp(nval);
+ for (int i = 0; i < nval; i++) {
+ tmp[i] = (p.second.values[i] / static_cast<float>(p.second.counts[i])) * static_cast<float>(p.second.ncall);
+ }
+ out.write((const char*)tmp.data(), nval*sizeof(float));
+ }
}
// Write the number of call the matrix was computed with
imatrix_data = {};
return false;
}
- e.values.resize(nval);
- in.read((char*)e.values.data(), nval*sizeof(float));
+
+ // When re-called from load_imatrix() with add set, this will already be created.
+ if (e.values.empty()) {
+ e.values.resize(nval, 0);
+ e.counts.resize(nval, 0);
+ }
+
+ std::vector<float> tmp(nval);
+ in.read((char*)tmp.data(), nval*sizeof(float));
if (in.fail()) {
printf("%s: failed reading data for entry %d\n",__func__,i);
imatrix_data = {};
return false;
}
- e.ncall = ncall;
+
+ // Recreate the state as expected by save_imatrix(), and corerct for weighted sum.
+ for (int i = 0; i < nval; i++) {
+ e.values[i] += tmp[i];
+ e.counts[i] += ncall;
+ }
+ e.ncall += ncall;
+
}
return true;
}