There several places where a gguf context is allocated. A call to gguf_free
is missing in some error paths. Also on linux, llama-bench was missing a
fclose.
for (int j = 0; j < ggml_nelements(cur); ++j) {
if (data[j] != 100 + i) {
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
+ gguf_free(ctx);
return false;
}
}
}
}
}
+ fclose(f);
}
#endif
// TODO: other platforms
if (!new_clip->ctx_data) {
fprintf(stderr, "%s: ggml_init() failed\n", __func__);
clip_free(new_clip);
+ gguf_free(ctx);
return nullptr;
}
if (!fin) {
printf("cannot open model file for loading tensors\n");
clip_free(new_clip);
+ gguf_free(ctx);
return nullptr;
}
if (!fin) {
printf("%s: failed to seek for tensor %s\n", __func__, name);
clip_free(new_clip);
+ gguf_free(ctx);
return nullptr;
}
int num_bytes = ggml_nbytes(cur);
break;
default:
printf("Please use an input file in f32 or f16\n");
+ gguf_free(ctx_out);
return false;
}
load_checkpoint_gguf(fctx, f_ggml_ctx, model, train);
+ gguf_free(fctx);
return true;
}