ggml_backend_sched_t sched;
{
// initialize the scheduler
- sched = ggml_backend_sched_new(model.backends.data(), NULL, model.backends.size(), GPT2_MAX_NODES, false);
+ sched = ggml_backend_sched_new(model.backends.data(), NULL, model.backends.size(), GPT2_MAX_NODES, false, true);
// create the worst case graph for memory usage estimation
int n_tokens = std::min(model.hparams.n_ctx, params.n_batch);
void mnist_model_build(mnist_model & model) {
if (model.arch == "mnist-fc") {
- ggml_set_param(model.ctx_compute, model.fc1_weight);
- ggml_set_param(model.ctx_compute, model.fc1_bias);
- ggml_set_param(model.ctx_compute, model.fc2_weight);
- ggml_set_param(model.ctx_compute, model.fc2_bias);
+ ggml_set_param(model.fc1_weight);
+ ggml_set_param(model.fc1_bias);
+ ggml_set_param(model.fc2_weight);
+ ggml_set_param(model.fc2_bias);
ggml_tensor * fc1 = ggml_relu(model.ctx_compute, ggml_add(model.ctx_compute,
ggml_mul_mat(model.ctx_compute, model.fc1_weight, model.images),
ggml_mul_mat(model.ctx_compute, model.fc2_weight, fc1),
model.fc2_bias);
} else if (model.arch == "mnist-cnn") {
- ggml_set_param(model.ctx_compute, model.conv1_kernel);
- ggml_set_param(model.ctx_compute, model.conv1_bias);
- ggml_set_param(model.ctx_compute, model.conv2_kernel);
- ggml_set_param(model.ctx_compute, model.conv2_bias);
- ggml_set_param(model.ctx_compute, model.dense_weight);
- ggml_set_param(model.ctx_compute, model.dense_bias);
+ ggml_set_param(model.conv1_kernel);
+ ggml_set_param(model.conv1_bias);
+ ggml_set_param(model.conv2_kernel);
+ ggml_set_param(model.conv2_bias);
+ ggml_set_param(model.dense_weight);
+ ggml_set_param(model.dense_bias);
struct ggml_tensor * images_2D = ggml_reshape_4d(model.ctx_compute, model.images, MNIST_HW, MNIST_HW, 1, model.images->ne[1]);
ggml_opt_result_t mnist_model_eval(mnist_model & model, ggml_opt_dataset_t dataset) {
ggml_opt_result_t result = ggml_opt_result_init();
- ggml_opt_params params = ggml_opt_default_params(model.backend_sched, model.ctx_compute, model.images, model.logits, GGML_OPT_LOSS_TYPE_CROSS_ENTROPY);
+ ggml_opt_params params = ggml_opt_default_params(model.backend_sched, GGML_OPT_LOSS_TYPE_CROSS_ENTROPY);
params.build_type = GGML_OPT_BUILD_TYPE_FORWARD;
ggml_opt_context_t opt_ctx = ggml_opt_init(params);
int wasm_eval(uint8_t * digitPtr) {
std::vector<float> digit(digitPtr, digitPtr + MNIST_NINPUT);
- ggml_opt_dataset_t dataset = ggml_opt_dataset_init(MNIST_NINPUT, MNIST_NCLASSES, 1, 1);
+ ggml_opt_dataset_t dataset = ggml_opt_dataset_init(GGML_TYPE_F32, GGML_TYPE_F32, MNIST_NINPUT, MNIST_NCLASSES, 1, 1);
struct ggml_tensor * data = ggml_opt_dataset_data(dataset);
memcpy(data->data, digitPtr, ggml_nbytes(data));
ggml_set_zero(ggml_opt_dataset_labels(dataset)); // The labels are not needed.
}
// The order of the backends passed to ggml_backend_sched_new determines which backend is given priority.
- backend_sched = ggml_backend_sched_new(backends.data(), nullptr, backends.size(), GGML_DEFAULT_GRAPH_SIZE, false);
+ backend_sched = ggml_backend_sched_new(backends.data(), nullptr, backends.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
fprintf(stderr, "%s: using %s (%s) as primary backend\n",
__func__, ggml_backend_name(backends[0]), ggml_backend_dev_description(devices[0]));
if (backends.size() >= 2) {
exit(1);
}
- ggml_opt_dataset_t dataset = ggml_opt_dataset_init(MNIST_NINPUT, MNIST_NCLASSES, MNIST_NTEST, MNIST_NBATCH_PHYSICAL);
+ ggml_opt_dataset_t dataset = ggml_opt_dataset_init(GGML_TYPE_F32, GGML_TYPE_F32, MNIST_NINPUT, MNIST_NCLASSES, MNIST_NTEST, MNIST_NBATCH_PHYSICAL);
if (!mnist_image_load(argv[2], dataset)) {
return 1;
// The MNIST model is so small that the overhead from data shuffling is non-negligible, especially with CUDA.
// With a shard size of 10 this overhead is greatly reduced at the cost of less shuffling (does not seem to have a significant impact).
// A batch of 500 images then consists of 50 random shards of size 10 instead of 500 random shards of size 1.
- ggml_opt_dataset_t dataset = ggml_opt_dataset_init(MNIST_NINPUT, MNIST_NCLASSES, MNIST_NTRAIN, /*ndata_shard =*/ 10);
+ ggml_opt_dataset_t dataset = ggml_opt_dataset_init(GGML_TYPE_F32, GGML_TYPE_F32, MNIST_NINPUT, MNIST_NCLASSES, MNIST_NTRAIN, /*ndata_shard =*/ 10);
if (!mnist_image_load(argv[3], dataset)) {
return 1;
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
ggml_build_forward_expand(gf, f);
- struct ggml_cgraph * gb = ggml_graph_dup(ctx0, gf);
- ggml_build_backward_expand(ctx0, ctx0, gb, false);
+ struct ggml_cgraph * gb = ggml_graph_dup(ctx0, gf, false);
+ ggml_build_backward_expand(ctx0, gb, false);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
ggml_graph_reset(gb);
ne[1] = rand()%4 + 1;
x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
+ ggml_set_param(x[0]);
struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
struct ggml_tensor * f = ggml_sum(ctx0, m);
ne[0] = rand()%4 + 1;
x[1] = ggml_cont(ctx0, ggml_transpose(ctx0, get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f)));
- ggml_set_param(ctx0, x[0]);
+ ggml_set_param(x[0]);
struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
struct ggml_tensor * f = ggml_sum(ctx0, m);