Skip to content

Commit d6bd4d4

Browse files
authored
llama : support StableLM 2 1.6B (#5052)
* llama : support StableLM 2 1.6B * convert : fix Qwen's set_vocab wrongly naming all special tokens [PAD{id}] * convert : refactor Qwen's set_vocab to use it for StableLM 2 too * nix : add tiktoken to llama-python-extra * convert : use presence of tokenizer.json to determine StableLM tokenizer loader It's a less arbitrary heuristic than the vocab size.
1 parent 152d9d0 commit d6bd4d4

File tree

3 files changed

+79
-46
lines changed

3 files changed

+79
-46
lines changed

.devops/nix/package.nix

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ let
7373
ps: [
7474
ps.numpy
7575
ps.sentencepiece
76+
ps.tiktoken
7677
ps.torchWithoutCuda
7778
ps.transformers
7879
]

convert-hf-to-gguf.py

+60-46
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,58 @@ def _set_vocab_gpt2(self):
289289
special_vocab = gguf.SpecialVocab(dir_model, load_merges=True)
290290
special_vocab.add_to_gguf(self.gguf_writer)
291291

292+
def _set_vocab_qwen(self):
293+
dir_model = self.dir_model
294+
hparams = self.hparams
295+
tokens: list[bytearray] = []
296+
toktypes: list[int] = []
297+
298+
from transformers import AutoTokenizer
299+
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
300+
vocab_size = hparams["vocab_size"]
301+
assert max(tokenizer.get_vocab().values()) < vocab_size
302+
303+
merges = []
304+
vocab = {}
305+
mergeable_ranks = tokenizer.mergeable_ranks
306+
for token, rank in mergeable_ranks.items():
307+
vocab[QwenModel.token_bytes_to_string(token)] = rank
308+
if len(token) == 1:
309+
continue
310+
merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
311+
assert len(merged) == 2
312+
merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
313+
314+
# for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined
315+
added_vocab = tokenizer.special_tokens
316+
reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in (vocab | added_vocab).items()}
317+
318+
for i in range(vocab_size):
319+
if i not in reverse_vocab:
320+
pad_token = f"[PAD{i}]".encode("utf-8")
321+
tokens.append(bytearray(pad_token))
322+
toktypes.append(gguf.TokenType.USER_DEFINED)
323+
elif reverse_vocab[i] in added_vocab:
324+
tokens.append(reverse_vocab[i])
325+
toktypes.append(gguf.TokenType.CONTROL)
326+
else:
327+
tokens.append(reverse_vocab[i])
328+
toktypes.append(gguf.TokenType.NORMAL)
329+
330+
self.gguf_writer.add_tokenizer_model("gpt2")
331+
self.gguf_writer.add_token_list(tokens)
332+
self.gguf_writer.add_token_types(toktypes)
333+
334+
special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
335+
special_vocab.merges = merges
336+
# only add special tokens when they were not already loaded from config.json
337+
if len(special_vocab.special_token_ids) == 0:
338+
special_vocab._set_special_token("bos", tokenizer.special_tokens["<|endoftext|>"])
339+
special_vocab._set_special_token("eos", tokenizer.special_tokens["<|endoftext|>"])
340+
# this one is usually not in config.json anyway
341+
special_vocab._set_special_token("unk", tokenizer.special_tokens["<|endoftext|>"])
342+
special_vocab.add_to_gguf(self.gguf_writer)
343+
292344
def _set_vocab_sentencepiece(self):
293345
from sentencepiece import SentencePieceProcessor
294346

@@ -877,6 +929,13 @@ def write_tensors(self):
877929

878930

879931
class StableLMModel(Model):
932+
def set_vocab(self):
933+
if (self.dir_model / "tokenizer.json").is_file():
934+
self._set_vocab_gpt2()
935+
else:
936+
# StableLM 2 1.6B uses a vocab in a similar format to Qwen's vocab
937+
self._set_vocab_qwen()
938+
880939
def set_gguf_parameters(self):
881940
hparams = self.hparams
882941
block_count = hparams["num_hidden_layers"]
@@ -922,52 +981,7 @@ def bpe(mergeable_ranks: dict[bytes, int], token: bytes, max_rank: int | None =
922981
return parts
923982

924983
def set_vocab(self):
925-
dir_model = self.dir_model
926-
hparams = self.hparams
927-
tokens: list[bytearray] = []
928-
toktypes: list[int] = []
929-
930-
from transformers import AutoTokenizer
931-
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
932-
vocab_size = hparams["vocab_size"]
933-
assert max(tokenizer.get_vocab().values()) < vocab_size
934-
935-
merges = []
936-
vocab = {}
937-
mergeable_ranks = tokenizer.mergeable_ranks
938-
for token, rank in mergeable_ranks.items():
939-
vocab[self.token_bytes_to_string(token)] = rank
940-
if len(token) == 1:
941-
continue
942-
merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
943-
assert len(merged) == 2
944-
merges.append(' '.join(map(self.token_bytes_to_string, merged)))
945-
946-
reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in vocab.items()}
947-
added_vocab = tokenizer.special_tokens
948-
949-
for i in range(vocab_size):
950-
if i not in reverse_vocab:
951-
pad_token = f"[PAD{i}]".encode("utf-8")
952-
tokens.append(bytearray(pad_token))
953-
toktypes.append(gguf.TokenType.USER_DEFINED)
954-
elif reverse_vocab[i] in added_vocab:
955-
tokens.append(reverse_vocab[i])
956-
toktypes.append(gguf.TokenType.CONTROL)
957-
else:
958-
tokens.append(reverse_vocab[i])
959-
toktypes.append(gguf.TokenType.NORMAL)
960-
961-
self.gguf_writer.add_tokenizer_model("gpt2")
962-
self.gguf_writer.add_token_list(tokens)
963-
self.gguf_writer.add_token_types(toktypes)
964-
965-
special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
966-
special_vocab.merges = merges
967-
special_vocab._set_special_token("bos", tokenizer.special_tokens["<|endoftext|>"])
968-
special_vocab._set_special_token("eos", tokenizer.special_tokens["<|endoftext|>"])
969-
special_vocab._set_special_token("unk", tokenizer.special_tokens["<|endoftext|>"])
970-
special_vocab.add_to_gguf(self.gguf_writer)
984+
self._set_vocab_qwen()
971985

972986
def set_gguf_parameters(self):
973987
self.gguf_writer.add_name("Qwen")

llama.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -2877,6 +2877,7 @@ static void llm_load_hparams(
28772877
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
28782878

28792879
switch (hparams.n_layer) {
2880+
case 24: model.type = e_model::MODEL_1B; break;
28802881
case 32: model.type = e_model::MODEL_3B; break;
28812882
default: model.type = e_model::MODEL_UNKNOWN;
28822883
}
@@ -3700,6 +3701,11 @@ static bool llm_load_tensors(
37003701
layer.wv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa});
37013702
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd});
37023703

3704+
// optional bias tensors, present in Stable LM 2 1.6B
3705+
layer.bq = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, false);
3706+
layer.bk = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, false);
3707+
layer.bv = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, false);
3708+
37033709
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
37043710
layer.ffn_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "bias", i), {n_embd});
37053711

@@ -5598,12 +5604,24 @@ struct llm_build_context {
55985604
// compute Q and K and RoPE them
55995605
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq, cur);
56005606
cb(Qcur, "Qcur", il);
5607+
if (model.layers[il].bq) {
5608+
Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
5609+
cb(Qcur, "Qcur", il);
5610+
}
56015611

56025612
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur);
56035613
cb(Kcur, "Kcur", il);
5614+
if (model.layers[il].bk) {
5615+
Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
5616+
cb(Kcur, "Kcur", il);
5617+
}
56045618

56055619
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur);
56065620
cb(Vcur, "Vcur", il);
5621+
if (model.layers[il].bv) {
5622+
Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
5623+
cb(Vcur, "Vcur", il);
5624+
}
56075625

56085626
Qcur = ggml_rope_custom(
56095627
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,

0 commit comments

Comments
 (0)