Skip to content

Commit 5c9f90c

Browse files
authored
swift : fix prompt tokenization logic (#4321)
1 parent 4fa44e8 commit 5c9f90c

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

examples/batched.swift/Sources/main.swift

+3-2
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,10 @@ print("decoded \(n_decode) tokens in \(String(format: "%.2f", Double(t_main_end
215215
llama_print_timings(context)
216216

217217
private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
218-
let n_tokens = text.count + (add_bos ? 1 : 0)
218+
let utf8Count = text.utf8.count
219+
let n_tokens = utf8Count + (add_bos ? 1 : 0)
219220
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
220-
let tokenCount = llama_tokenize(model, text, Int32(text.count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false)
221+
let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false)
221222
var swiftTokens: [llama_token] = []
222223
for i in 0 ..< tokenCount {
223224
swiftTokens.append(tokens[Int(i)])

examples/llama.swiftui/llama.cpp.swift/LibLlama.swift

+3-2
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,10 @@ actor LlamaContext {
147147
}
148148

149149
private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
150-
let n_tokens = text.count + (add_bos ? 1 : 0)
150+
let utf8Count = text.utf8.count
151+
let n_tokens = utf8Count + (add_bos ? 1 : 0)
151152
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
152-
let tokenCount = llama_tokenize(model, text, Int32(text.count), tokens, Int32(n_tokens), add_bos, false)
153+
let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false)
153154

154155
var swiftTokens: [llama_token] = []
155156
for i in 0..<tokenCount {

0 commit comments

Comments
 (0)