void keep_imatrix(int ncall) const;
};
+// remove any prefix and suffixes from the name
+// CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
+static std::string filter_tensor_name(const char * name) {
+ std::string wname;
+ const char * p = strchr(name, '#');
+ if (p != NULL) {
+ p = p + 1;
+ const char * q = strchr(p, '#');
+ if (q != NULL) {
+ wname = std::string(p, q - p);
+ } else {
+ wname = p;
+ }
+ } else {
+ wname = name;
+ }
+ return wname;
+}
+
bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
GGML_UNUSED(user_data);
const struct ggml_tensor * src0 = t->src[0];
const struct ggml_tensor * src1 = t->src[1];
-
- std::string wname;
- {
- // remove any prefix and suffixes from the name
- // CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
- const char * p = strchr(src0->name, '#');
- if (p != NULL) {
- p = p + 1;
- const char * q = strchr(p, '#');
- if (q != NULL) {
- wname = std::string(p, q - p);
- } else {
- wname = p;
- }
- } else {
- wname = src0->name;
- }
- }
+ std::string wname = filter_tensor_name(src0->name);
// when ask is true, the scheduler wants to know if we are interested in data from this tensor
// if we return true, a follow-up call will be made with ask=false in which we can do the actual collection
// this is necessary to guarantee equal number of "ncall" for each tensor
for (int ex = 0; ex < n_as; ++ex) {
src0 = t->src[2 + ex];
+ wname = filter_tensor_name(src0->name);
auto& e = m_stats[wname];
if (e.values.empty()) {
e.values.resize(src1->ne[0], 0);