struct ggml_allocr * allocr = NULL;
// allocate the compute buffer
{
- // alignment required by the backend
- size_t align = ggml_backend_get_alignment(model.backend);
- allocr = ggml_allocr_new_measure(align);
+ // create an allocator to measure the memory usage
+ allocr = ggml_allocr_new_measure_from_backend(model.backend);
// create the worst case graph for memory usage estimation
int n_tokens = std::min(model.hparams.n_ctx, params.n_batch);
// prepare required memory and allocate the compute buffer
struct ggml_allocr * allocr = NULL;
{
- // alignment required by the backend
- size_t align = ggml_backend_get_alignment(model.backend);
- allocr = ggml_allocr_new_measure(align);
+ // create an allocator to measure the memory usage
+ allocr = ggml_allocr_new_measure_from_backend(model.backend);
batch.n_tokens = n_batch_max;
struct ggml_allocr * allocr = NULL;
{
- size_t align = ggml_backend_get_alignment(model.backend);
- allocr = ggml_allocr_new_measure(align);
+ allocr = ggml_allocr_new_measure_from_backend(model.backend);
//create the worst case graph for memory usage estimation
struct ggml_cgraph * gf = build_graph(model, allocr);
struct ggml_allocr * allocr = NULL;
{
- size_t align = ggml_backend_get_alignment(model.backend);
- allocr = ggml_allocr_new_measure(align);
+ allocr = ggml_allocr_new_measure_from_backend(model.backend);
//create the worst case graph for memory usage estimation
struct ggml_cgraph * gf = build_graph(model, allocr);
struct ggml_allocr * allocr = NULL;
{
- size_t align = ggml_backend_get_alignment(model.backend);
- allocr = ggml_allocr_new_measure(align);
+ allocr = ggml_allocr_new_measure_from_backend(model.backend);
//create the worst case graph for memory usage estimation
struct ggml_cgraph * gf = build_graph(model, allocr);