@@ -704,6 +704,7 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
704
704
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
705
705
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
706
706
{ LLM_TENSOR_TOKEN_TYPES, "token_types" },
707
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
707
708
{ LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
708
709
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
709
710
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
@@ -4653,8 +4654,7 @@ static void llm_load_vocab(
4653
4654
LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
4654
4655
LLAMA_LOG_WARN("%s: \n", __func__);
4655
4656
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
4656
- } else if (
4657
- tokenizer_pre == "default") {
4657
+ } else if (tokenizer_pre == "default") {
4658
4658
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
4659
4659
} else if (
4660
4660
tokenizer_pre == "llama3" ||
@@ -4681,7 +4681,8 @@ static void llm_load_vocab(
4681
4681
tokenizer_pre == "jina-es" ||
4682
4682
tokenizer_pre == "jina-de" ||
4683
4683
tokenizer_pre == "jina-v2-es" ||
4684
- tokenizer_pre == "jina-v2-de") {
4684
+ tokenizer_pre == "jina-v2-de" ||
4685
+ tokenizer_pre == "jina-v2-code") {
4685
4686
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2;
4686
4687
} else if (
4687
4688
tokenizer_pre == "refact") {
@@ -5515,7 +5516,7 @@ static bool llm_load_tensors(
5515
5516
5516
5517
layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd});
5517
5518
} else {
5518
- layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
5519
+ layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
5519
5520
}
5520
5521
5521
5522
layer.layer_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd});
@@ -5556,6 +5557,9 @@ static bool llm_load_tensors(
5556
5557
layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
5557
5558
layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd});
5558
5559
5560
+ layer.attn_norm_2 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5561
+ layer.attn_norm_2_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5562
+
5559
5563
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
5560
5564
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
5561
5565
@@ -8519,6 +8523,11 @@ struct llm_build_context {
8519
8523
// attention layer norm
8520
8524
cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_out_norm, model.layers[il].attn_out_norm_b, LLM_NORM, cb, il);
8521
8525
8526
+ if (model.layers[il].attn_norm_2 != nullptr) {
8527
+ cur = ggml_add(ctx0, cur, inpL); // re-add the layer input
8528
+ cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_norm_2, model.layers[il].attn_norm_2_b, LLM_NORM, cb, il);
8529
+ }
8530
+
8522
8531
struct ggml_tensor * ffn_inp = cur;
8523
8532
cb(ffn_inp, "ffn_inp", il);
8524
8533
0 commit comments