]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
gguf-py : support lazy tensor splitting (#12809)
authorcompilade <redacted>
Tue, 8 Apr 2025 07:03:07 +0000 (03:03 -0400)
committerGitHub <redacted>
Tue, 8 Apr 2025 07:03:07 +0000 (09:03 +0200)
* gguf-py : support lazy tensor splitting

Splitting usually involves returning tuples of tensors,
which need to be handled properly to avoid early eager evaluation.

* gguf-py : fix flake8 lint

gguf-py/gguf/lazy.py

index 8d4fece2dca86983286a3c0de15ca86578ce4dfa..f9bcadae0224bac5f922ce6565dd1e3883a923e1 100644 (file)
@@ -139,6 +139,16 @@ class LazyBase(ABC, metaclass=LazyMeta):
 
             if isinstance(res, cls._tensor_type):
                 return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
+            elif isinstance(res, tuple) and all(isinstance(t, cls._tensor_type) for t in res):
+                # share the evaluation between lazy tuple elements
+                shared_args: list = [args, None]
+
+                def eager_tuple_element(a: list[Any], i: int = 0, /, **kw) -> LazyBase:
+                    assert len(a) == 2
+                    if a[1] is None:
+                        a[1] = fn(*a[0], **kw)
+                    return a[1][i]
+                return tuple(cls(meta=cls.eager_to_meta(res[i]), args=(shared_args, i), kwargs=kwargs, func=eager_tuple_element) for i in range(len(res)))
             else:
                 del res  # not needed
                 # non-tensor return likely relies on the contents of the args