]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
swift : fix token_to_piece implementation (#4278)
authorMiwa / Ensan <redacted>
Fri, 1 Dec 2023 18:19:45 +0000 (03:19 +0900)
committerGitHub <redacted>
Fri, 1 Dec 2023 18:19:45 +0000 (20:19 +0200)
* Fix token_to_piece implementation in Swift

* Fix errors

examples/batched.swift/Sources/main.swift
examples/llama.swiftui/llama.cpp.swift/LibLlama.swift

index ba15197aecf09da644d6fff9224c10cd97539f96..ce9d80d9b6c4acb7387bc4d08cd507ea565c6e85 100644 (file)
@@ -230,18 +230,15 @@ private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String
     var result = [CChar](repeating: 0, count: 8)
     let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count))
     if nTokens < 0 {
-        if result.count >= -Int(nTokens) {
-            result.removeLast(-Int(nTokens))
-        } else {
-            result.removeAll()
-        }
+        let actualTokensCount = -Int(nTokens)
+        result = .init(repeating: 0, count: actualTokensCount)
         let check = llama_token_to_piece(
             model,
             token,
             &result,
             Int32(result.count)
         )
-        assert(check == nTokens)
+        assert(check == actualTokensCount)
     } else {
         result.removeLast(result.count - Int(nTokens))
     }
@@ -259,5 +256,4 @@ private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String
         buffer = []
         return bufferString
     }
-    return nil
 }
index aaef09611bc909031dcc59e2585771e8dfd542aa..09b36d9e65b174db34e7cc45382ad7e6e77c6f26 100644 (file)
@@ -164,13 +164,21 @@ actor LlamaContext {
     private func token_to_piece(token: llama_token) -> String {
         let result = UnsafeMutablePointer<Int8>.allocate(capacity: 8)
         result.initialize(repeating: Int8(0), count: 8)
-
-        let _ = llama_token_to_piece(model, token, result, 8)
-
-        let resultStr = String(cString: result)
-
-        result.deallocate()
-
-        return resultStr
+        defer {
+            result.deallocate()
+        }
+        let nTokens = llama_token_to_piece(model, token, result, 8)
+
+        if nTokens < 0 {
+            let newResult = UnsafeMutablePointer<Int8>.allocate(capacity: Int(-nTokens))
+            newResult.initialize(repeating: Int8(0), count: Int(-nTokens))
+            defer {
+                newResult.deallocate()
+            }
+            _ = llama_token_to_piece(model, token, newResult, -nTokens)
+            return String(cString: newResult)
+        } else {
+            return String(cString: result)
+        }
     }
 }