const std = @import("std");
-// Zig Version: 0.11.0-dev.3798+a5e15eced
+// Zig Version: 0.11.0-dev.3886+0c1bfe271
// Zig Build Command: zig build
-// Zig Run Command: zig build -h
-// zig build run_dolly-v2
-// zig build run_gpt-2
-// zig build run_gpt-j
-// zig build run_gpt-neox
-// zig build run_mnist
-// zig build run_mpt
-// zig build run_replit
-// zig build run_starcoder
-// zig build run_test-grad0
-// zig build run_test-mul-mat0
-// zig build run_test-mul-mat2
-// zig build run_test-opt
-// zig build run_test-vec1
-// zig build run_test0
-// zig build run_test1
-// zig build run_test2
+// Zig Run Command: zig build -h
+// zig build run_dolly-v2
+// zig build run_gpt-2
+// zig build run_gpt-j
+// zig build run_gpt-neox
+// zig build run_mnist
+// zig build run_mpt
+// zig build run_replit
+// zig build run_starcoder
+// zig build run_test-grad0
+// zig build run_test-mul-mat0
+// zig build run_test-mul-mat2
+// zig build run_test-opt
+// zig build run_test-vec1
+// zig build run_test0
+// zig build run_test1
+// zig build run_test2
// zig build run_test3
// zig build run_zig_test0
-// zig build run_zig_test1
+// zig build run_zig_test1
+// zig build run_zig_test2
+// zig build run_zig_test3
pub fn build(b: *std.build.Builder) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const zig_tests = .{
"test0",
"test1",
+ "test2",
+ "test3",
};
inline for (zig_tests) |name| {
const exe = b.addExecutable(.{
const run_step = b.step("run_zig_" ++ name, "Run zig_tests");
run_step.dependOn(&run_cmd.step);
}
-}
\ No newline at end of file
+}
const std = @import("std");\r
const c = @cImport({\r
- @cInclude("stdio.h");\r
- @cInclude("stdlib.h");\r
@cInclude("ggml/ggml.h");\r
});\r
\r
const std = @import("std");\r
const c = @cImport({\r
- @cInclude("stdio.h");\r
- @cInclude("stdlib.h");\r
@cInclude("ggml/ggml.h");\r
});\r
\r
c.ggml_graph_dump_dot(&gf, null, "test1-2-forward.dot");\r
c.ggml_graph_dump_dot(&gb, &gf, "test1-2-backward.dot");\r
}\r
- \r
+\r
///////////////////////////////////////////////////////////////\r
\r
{\r
try std.testing.expect(c.ggml_get_f32_1d(y, 0) == 12.0);\r
try std.testing.expect(c.ggml_get_f32_1d(x1.*.grad, 0) == 24.0);\r
try std.testing.expect(c.ggml_get_f32_1d(x2.*.grad, 0) == 12.0);\r
- try std.testing.expect(c.ggml_get_f32_1d(x3.*.grad, 0) == 4.0); \r
+ try std.testing.expect(c.ggml_get_f32_1d(x3.*.grad, 0) == 4.0);\r
\r
const g1 = x1.*.grad;\r
const g2 = x2.*.grad;\r
\r
c.ggml_graph_compute(ctx0, @constCast(&gbb));\r
\r
- std.debug.print("H * [1, 1, 1] = [ {d:.6} {d:.6} {d:.6}]\n", \r
- .{ \r
- c.ggml_get_f32_1d(x1.*.grad, 0), \r
+ std.debug.print("H * [1, 1, 1] = [ {d:.6} {d:.6} {d:.6}]\n",\r
+ .{\r
+ c.ggml_get_f32_1d(x1.*.grad, 0),\r
c.ggml_get_f32_1d(x2.*.grad, 0),\r
c.ggml_get_f32_1d(x3.*.grad, 0),\r
});\r
\r
try std.testing.expect(c.ggml_get_f32_1d(x1.*.grad, 0) == 56.0);\r
try std.testing.expect(c.ggml_get_f32_1d(x2.*.grad, 0) == 34.0);\r
- try std.testing.expect(c.ggml_get_f32_1d(x3.*.grad, 0) == 12.0); \r
+ try std.testing.expect(c.ggml_get_f32_1d(x3.*.grad, 0) == 12.0);\r
\r
c.ggml_graph_dump_dot(&gf, null, "test1-4-forward.dot");\r
c.ggml_graph_dump_dot(&gb, &gf, "test1-4-backward.dot");\r
c.ggml_graph_compute(ctx0, @constCast(&gb));\r
\r
std.debug.print("y = {d:.6}\n", .{c.ggml_get_f32_1d(y, 0)});\r
- std.debug.print("df/dx1 = {d:.6} {d:.6} {d:.6}\n", \r
+ std.debug.print("df/dx1 = {d:.6} {d:.6} {d:.6}\n",\r
.{\r
c.ggml_get_f32_1d(x1.*.grad, 0),\r
c.ggml_get_f32_1d(x1.*.grad, 1),\r
c.ggml_get_f32_1d(x1.*.grad, 2),\r
});\r
- std.debug.print("df/dx2 = {d:.6} {d:.6} {d:.6}\n", \r
+ std.debug.print("df/dx2 = {d:.6} {d:.6} {d:.6}\n",\r
.{\r
c.ggml_get_f32_1d(x2.*.grad, 0),\r
c.ggml_get_f32_1d(x2.*.grad, 1),\r
c.ggml_graph_compute(ctx0, @constCast(&gb));\r
\r
std.debug.print("y = {d:.6}\n", .{c.ggml_get_f32_1d(y, 0)});\r
- std.debug.print("df/dx1 = {d:.6} {d:.6} {d:.6}\n", \r
+ std.debug.print("df/dx1 = {d:.6} {d:.6} {d:.6}\n",\r
.{\r
c.ggml_get_f32_1d(x1.*.grad, 0),\r
c.ggml_get_f32_1d(x1.*.grad, 1),\r
c.ggml_get_f32_1d(x1.*.grad, 2),\r
});\r
- std.debug.print("df/dx2 = {d:.6} {d:.6} {d:.6}\n", \r
+ std.debug.print("df/dx2 = {d:.6} {d:.6} {d:.6}\n",\r
.{\r
c.ggml_get_f32_1d(x2.*.grad, 0),\r
c.ggml_get_f32_1d(x2.*.grad, 1),\r
c.ggml_graph_compute(ctx0, @constCast(&gb));\r
\r
std.debug.print("y = {d:.6}\n", .{c.ggml_get_f32_1d(y, 0)});\r
- std.debug.print("df/dx1 = {d:.6} {d:.6} {d:.6}\n", \r
+ std.debug.print("df/dx1 = {d:.6} {d:.6} {d:.6}\n",\r
.{\r
c.ggml_get_f32_1d(x1.*.grad, 0),\r
c.ggml_get_f32_1d(x1.*.grad, 1),\r
c.ggml_get_f32_1d(x1.*.grad, 2),\r
});\r
- std.debug.print("df/dx2 = {d:.6} {d:.6} {d:.6}\n", \r
+ std.debug.print("df/dx2 = {d:.6} {d:.6} {d:.6}\n",\r
.{\r
c.ggml_get_f32_1d(x2.*.grad, 0),\r
c.ggml_get_f32_1d(x2.*.grad, 1),\r
c.ggml_graph_compute(ctx0, @constCast(&gb));\r
\r
std.debug.print("y = {d:.6}\n", .{c.ggml_get_f32_1d(y, 0)});\r
- std.debug.print("df/dx1 = {d:.6} {d:.6} {d:.6}\n", \r
+ std.debug.print("df/dx1 = {d:.6} {d:.6} {d:.6}\n",\r
.{\r
c.ggml_get_f32_1d(x1.*.grad, 0),\r
c.ggml_get_f32_1d(x1.*.grad, 1),\r
c.ggml_get_f32_1d(x1.*.grad, 2),\r
});\r
- std.debug.print("df/dx2 = {d:.6} {d:.6} {d:.6}\n", \r
+ std.debug.print("df/dx2 = {d:.6} {d:.6} {d:.6}\n",\r
.{\r
c.ggml_get_f32_1d(x2.*.grad, 0),\r
c.ggml_get_f32_1d(x2.*.grad, 1),\r
c.ggml_graph_compute(ctx0, @constCast(&gb));\r
\r
std.debug.print("y = {d:.6}\n", .{c.ggml_get_f32_1d(y, 0)});\r
- std.debug.print("df/dx1 = {d:.6} {d:.6} {d:.6}\n", \r
+ std.debug.print("df/dx1 = {d:.6} {d:.6} {d:.6}\n",\r
.{\r
c.ggml_get_f32_1d(x1.*.grad, 0),\r
c.ggml_get_f32_1d(x1.*.grad, 1),\r
c.ggml_get_f32_1d(x1.*.grad, 2),\r
});\r
- std.debug.print("df/dx2 = {d:.6} {d:.6} {d:.6}\n", \r
+ std.debug.print("df/dx2 = {d:.6} {d:.6} {d:.6}\n",\r
.{\r
c.ggml_get_f32_1d(x2.*.grad, 0),\r
c.ggml_get_f32_1d(x2.*.grad, 1),\r
}\r
\r
_ = try std.io.getStdIn().reader().readByte();\r
-}
\ No newline at end of file
+}\r
--- /dev/null
+const std = @import("std");\r
+const Thread = std.Thread;\r
+const c = @cImport({\r
+ @cInclude("ggml/ggml.h");\r
+});\r
+\r
+fn is_close(a: f32, b: f32, epsilon: f32) bool {\r
+ return std.math.fabs(a - b) < epsilon;\r
+}\r
+\r
+pub fn main() !void {\r
+ const params = .{\r
+ .mem_size = 128*1024*1024,\r
+ .mem_buffer = null,\r
+ .no_alloc = false,\r
+ };\r
+\r
+ var opt_params = c.ggml_opt_default_params(c.GGML_OPT_LBFGS);\r
+ \r
+ const nthreads = try Thread.getCpuCount();\r
+ opt_params.n_threads = @intCast(nthreads);\r
+ std.debug.print("test2: n_threads:{}\n", .{opt_params.n_threads});\r
+\r
+ const xi = [_]f32{ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };\r
+ const yi = [_]f32{ 15.0, 25.0, 35.0, 45.0, 55.0, 65.0, 75.0, 85.0, 95.0, 105.0 };\r
+\r
+ const n = xi.len;\r
+\r
+ const ctx0 = c.ggml_init(params);\r
+ defer c.ggml_free(ctx0);\r
+\r
+ const x = c.ggml_new_tensor_1d(ctx0, c.GGML_TYPE_F32, n);\r
+ const y = c.ggml_new_tensor_1d(ctx0, c.GGML_TYPE_F32, n);\r
+\r
+ for (0..n) |i| {\r
+ const x_data_pointer: [*]f32 = @ptrCast(@alignCast(x.*.data));\r
+ x_data_pointer[i] = xi[i];\r
+ const y_data_pointer: [*]f32 = @ptrCast(@alignCast(y.*.data));\r
+ y_data_pointer[i] = yi[i];\r
+ }\r
+\r
+ {\r
+ const t0 = c.ggml_new_f32(ctx0, 0.0);\r
+ const t1 = c.ggml_new_f32(ctx0, 0.0);\r
+\r
+ // initialize auto-diff parameters:\r
+ _ = c.ggml_set_param(ctx0, t0);\r
+ _ = c.ggml_set_param(ctx0, t1);\r
+\r
+ // f = sum_i[(t0 + t1*x_i - y_i)^2]/(2n)\r
+ const f =\r
+ c.ggml_div(ctx0,\r
+ c.ggml_sum(ctx0,\r
+ c.ggml_sqr(ctx0,\r
+ c.ggml_sub(ctx0,\r
+ c.ggml_add(ctx0,\r
+ c.ggml_mul(ctx0, x, c.ggml_repeat(ctx0, t1, x)),\r
+ c.ggml_repeat(ctx0, t0, x)),\r
+ y)\r
+ )\r
+ ),\r
+ c.ggml_new_f32(ctx0, @as(f32, 2.0)*n));\r
+\r
+ const res = c.ggml_opt(null, opt_params, f);\r
+\r
+ std.debug.print("t0 = {d:.6}\n", .{c.ggml_get_f32_1d(t0, 0)});\r
+ std.debug.print("t1 = {d:.6}\n", .{c.ggml_get_f32_1d(t1, 0)});\r
+\r
+ try std.testing.expect(res == c.GGML_OPT_OK);\r
+ try std.testing.expect(is_close(c.ggml_get_f32_1d(t0, 0), 5.0, 1e-3));\r
+ try std.testing.expect(is_close(c.ggml_get_f32_1d(t1, 0), 10.0, 1e-3));\r
+ }\r
+\r
+ {\r
+ const t0 = c.ggml_new_f32(ctx0, -1.0);\r
+ const t1 = c.ggml_new_f32(ctx0, 9.0);\r
+\r
+ _ = c.ggml_set_param(ctx0, t0);\r
+ _ = c.ggml_set_param(ctx0, t1);\r
+\r
+ // f = 0.5*sum_i[abs(t0 + t1*x_i - y_i)]/n\r
+ const f =\r
+ c.ggml_mul(ctx0,\r
+ c.ggml_new_f32(ctx0, @as(f32, 1.0)/(2*n)),\r
+ c.ggml_sum(ctx0,\r
+ c.ggml_abs(ctx0,\r
+ c.ggml_sub(ctx0,\r
+ c.ggml_add(ctx0,\r
+ c.ggml_mul(ctx0, x, c.ggml_repeat(ctx0, t1, x)),\r
+ c.ggml_repeat(ctx0, t0, x)),\r
+ y)\r
+ )\r
+ )\r
+ );\r
+\r
+\r
+ const res = c.ggml_opt(null, opt_params, f);\r
+\r
+ try std.testing.expect(res == c.GGML_OPT_OK);\r
+ try std.testing.expect(is_close(c.ggml_get_f32_1d(t0, 0), 5.0, 1e-2));\r
+ try std.testing.expect(is_close(c.ggml_get_f32_1d(t1, 0), 10.0, 1e-2));\r
+ }\r
+\r
+ {\r
+ const t0 = c.ggml_new_f32(ctx0, 5.0);\r
+ const t1 = c.ggml_new_f32(ctx0, -4.0);\r
+\r
+ _ = c.ggml_set_param(ctx0, t0);\r
+ _ = c.ggml_set_param(ctx0, t1);\r
+\r
+ // f = t0^2 + t1^2\r
+ const f =\r
+ c.ggml_add(ctx0,\r
+ c.ggml_sqr(ctx0, t0),\r
+ c.ggml_sqr(ctx0, t1)\r
+ );\r
+\r
+ const res = c.ggml_opt(null, opt_params, f);\r
+\r
+ try std.testing.expect(res == c.GGML_OPT_OK);\r
+ try std.testing.expect(is_close(c.ggml_get_f32_1d(f, 0), 0.0, 1e-3));\r
+ try std.testing.expect(is_close(c.ggml_get_f32_1d(t0, 0), 0.0, 1e-3));\r
+ try std.testing.expect(is_close(c.ggml_get_f32_1d(t1, 0), 0.0, 1e-3));\r
+ }\r
+\r
+ /////////////////////////////////////////\r
+\r
+ {\r
+ const t0 = c.ggml_new_f32(ctx0, -7.0);\r
+ const t1 = c.ggml_new_f32(ctx0, 8.0);\r
+\r
+ _ = c.ggml_set_param(ctx0, t0);\r
+ _ = c.ggml_set_param(ctx0, t1);\r
+\r
+ // f = (t0 + 2*t1 - 7)^2 + (2*t0 + t1 - 5)^2\r
+ const f =\r
+ c.ggml_add(ctx0,\r
+ c.ggml_sqr(ctx0,\r
+ c.ggml_sub(ctx0,\r
+ c.ggml_add(ctx0,\r
+ t0,\r
+ c.ggml_mul(ctx0, t1, c.ggml_new_f32(ctx0, 2.0))),\r
+ c.ggml_new_f32(ctx0, 7.0)\r
+ )\r
+ ),\r
+ c.ggml_sqr(ctx0,\r
+ c.ggml_sub(ctx0,\r
+ c.ggml_add(ctx0,\r
+ c.ggml_mul(ctx0, t0, c.ggml_new_f32(ctx0, 2.0)),\r
+ t1),\r
+ c.ggml_new_f32(ctx0, 5.0)\r
+ )\r
+ )\r
+ );\r
+\r
+ const res = c.ggml_opt(null, opt_params, f);\r
+\r
+ try std.testing.expect(res == c.GGML_OPT_OK);\r
+ try std.testing.expect(is_close(c.ggml_get_f32_1d(f, 0), 0.0, 1e-3));\r
+ try std.testing.expect(is_close(c.ggml_get_f32_1d(t0, 0), 1.0, 1e-3));\r
+ try std.testing.expect(is_close(c.ggml_get_f32_1d(t1, 0), 3.0, 1e-3));\r
+ }\r
+\r
+ _ = try std.io.getStdIn().reader().readByte();\r
+}\r
--- /dev/null
+const std = @import("std");\r
+const Thread = std.Thread;\r
+const c = @cImport({\r
+ @cInclude("stdlib.h");\r
+ @cInclude("ggml/ggml.h");\r
+});\r
+\r
+fn is_close(a: f32, b: f32, epsilon: f32) bool {\r
+ return std.math.fabs(a - b) < epsilon;\r
+}\r
+\r
+pub fn main() !void {\r
+ const params = .{\r
+ .mem_size = 128*1024*1024,\r
+ .mem_buffer = null,\r
+ .no_alloc = false,\r
+ };\r
+\r
+ var opt_params = c.ggml_opt_default_params(c.GGML_OPT_LBFGS);\r
+ \r
+ const nthreads = try Thread.getCpuCount();\r
+ opt_params.n_threads = @intCast(nthreads);\r
+\r
+ const NP = 1 << 12;\r
+ const NF = 1 << 8;\r
+\r
+ const ctx0 = c.ggml_init(params);\r
+ defer c.ggml_free(ctx0);\r
+\r
+ const F = c.ggml_new_tensor_2d(ctx0, c.GGML_TYPE_F32, NF, NP);\r
+ const l = c.ggml_new_tensor_1d(ctx0, c.GGML_TYPE_F32, NP);\r
+\r
+ // regularization weight\r
+ const lambda = c.ggml_new_f32(ctx0, 1e-5);\r
+\r
+ c.srand(0);\r
+\r
+ const l_data_pointer: [*]f32 = @ptrCast(@alignCast(l.*.data));\r
+ const f_data_pointer: [*]f32 = @ptrCast(@alignCast(F.*.data));\r
+ for (0..NP) |j| {\r
+ const ll = if (j < NP/2) @as(f32, 1.0) else @as(f32, -1.0);\r
+ l_data_pointer[j] = ll;\r
+ \r
+ for (0..NF) |i| {\r
+ const c_rand: f32 = @floatFromInt(c.rand());\r
+ f_data_pointer[j*NF + i] = \r
+ ((if (ll > 0 and i < NF/2) @as(f32, 1.0) else \r
+ if (ll < 0 and i >= NF/2) @as(f32, 1.0) else @as(f32, 0.0)) + \r
+ (c_rand/c.RAND_MAX - 0.5) * 0.1) / (0.5 * NF);\r
+ }\r
+ }\r
+\r
+ {\r
+ // initial guess\r
+ const x = c.ggml_set_f32(c.ggml_new_tensor_1d(ctx0, c.GGML_TYPE_F32, NF), 0.0);\r
+\r
+ c.ggml_set_param(ctx0, x);\r
+\r
+ // f = sum[(fj*x - l)^2]/n + lambda*|x^2|\r
+ const f =\r
+ c.ggml_add(ctx0,\r
+ c.ggml_div(ctx0,\r
+ c.ggml_sum(ctx0,\r
+ c.ggml_sqr(ctx0,\r
+ c.ggml_sub(ctx0,\r
+ c.ggml_mul_mat(ctx0, F, x),\r
+ l)\r
+ )\r
+ ),\r
+ c.ggml_new_f32(ctx0, @as(f32, NP))\r
+ ),\r
+ c.ggml_mul(ctx0,\r
+ c.ggml_sum(ctx0, c.ggml_sqr(ctx0, x)),\r
+ lambda)\r
+ );\r
+\r
+ const res = c.ggml_opt(null, opt_params, f);\r
+\r
+ try std.testing.expect(res == c.GGML_OPT_OK);\r
+\r
+ const x_data_pointer: [*]f32 = @ptrCast(@alignCast(x.*.data));\r
+ // print results\r
+ for (0..16) |i| {\r
+ std.debug.print("x[{d:3}] = {d:.6}\n", .{i, x_data_pointer[i]});\r
+ }\r
+ std.debug.print("...\n", .{});\r
+ for (NF - 16..NF) |i| {\r
+ std.debug.print("x[{d:3}] = {d:.6}\n", .{i, x_data_pointer[i]});\r
+ }\r
+ std.debug.print("\n", .{});\r
+\r
+ for (0..NF) |i| {\r
+ if (i < NF/2) {\r
+ try std.testing.expect(is_close(x_data_pointer[i], 1.0, 1e-2));\r
+ } else {\r
+ try std.testing.expect(is_close(x_data_pointer[i], -1.0, 1e-2));\r
+ }\r
+ }\r
+ }\r
+\r
+ _ = try std.io.getStdIn().reader().readByte();\r
+}\r