Skip to content

Commit d1940a3

Browse files
committed
update format
1 parent bd72ba0 commit d1940a3

File tree

2 files changed

+7
-27
lines changed

2 files changed

+7
-27
lines changed

convert-baichuan-hf-to-gguf.py

+3-16
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ def parse_args() -> argparse.Namespace:
120120
ctx_length = hparams["max_sequence_length"]
121121
elif "max_position_embeddings" in hparams:
122122
ctx_length = hparams["max_position_embeddings"]
123+
elif "model_max_length" in hparams:
124+
ctx_length = hparams["model_max_length"]
123125
else:
124126
print("gguf: can not find ctx length parameter.")
125127

@@ -231,12 +233,7 @@ def parse_args() -> argparse.Namespace:
231233

232234
tmp=model_part
233235
for i in itertools.count():
234-
if f"model.layers.{i}.self_attn.q_proj.weight" in model_part:
235-
print(f"Permuting layer {i}")
236-
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = reverse_hf_permute(model_part[f"model.layers.{i}.self_attn.q_proj.weight"], head_count, head_count)
237-
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = reverse_hf_permute(model_part[f"model.layers.{i}.self_attn.k_proj.weight"], head_count, head_count_kv)
238-
#tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
239-
elif f"model.layers.{i}.self_attn.W_pack.weight" in model_part:
236+
if f"model.layers.{i}.self_attn.W_pack.weight" in model_part:
240237
print(f"Unpacking and permuting layer {i}")
241238
tmp[f"model.layers.{i}.self_attn.q_proj.weight"]=reverse_hf_permute_part(model_part[f"model.layers.{i}.self_attn.W_pack.weight"],0,head_count,head_count)
242239
tmp[f"model.layers.{i}.self_attn.k_proj.weight"]=reverse_hf_permute_part(model_part[f"model.layers.{i}.self_attn.W_pack.weight"],1,head_count,head_count_kv)
@@ -259,14 +256,6 @@ def parse_args() -> argparse.Namespace:
259256

260257
data = data.squeeze().numpy()
261258

262-
# reverse permute these
263-
# if name.endswith(".q_proj.weight"):
264-
# data = reverse_hf_permute(data, head_count)
265-
# if name.endswith(".k_proj.weight"):
266-
# data = reverse_hf_permute(data, head_count, head_count_kv)
267-
268-
269-
270259
# map tensor names
271260
new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
272261
if new_name is None:
@@ -289,8 +278,6 @@ def parse_args() -> argparse.Namespace:
289278
data = data.astype(np.float16)
290279

291280
print(name + " -> " + new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
292-
293-
294281
gguf_writer.add_tensor(new_name, data)
295282

296283

llama.cpp

+4-11
Original file line numberDiff line numberDiff line change
@@ -1948,7 +1948,6 @@ static void llm_load_tensors(
19481948
const int64_t n_vocab = hparams.n_vocab;
19491949

19501950
const auto tn = LLM_TN(model.arch);
1951-
19521951
switch (model.arch) {
19531952
case LLM_ARCH_LLAMA:
19541953
{
@@ -2777,13 +2776,11 @@ static struct ggml_cgraph * llm_build_baichaun(
27772776

27782777
struct ggml_tensor * Kcur;
27792778
struct ggml_tensor * Qcur;
2780-
switch (model.type)
2781-
{
2779+
switch (model.type) {
27822780
case MODEL_7B:
27832781
Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
2784-
Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
2782+
Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
27852783
break;
2786-
27872784
case MODEL_13B:
27882785
Kcur = ggml_reshape_3d(ctx0, tmpk, n_embd/n_head, n_head, N);
27892786
Qcur = ggml_reshape_3d(ctx0, tmpq, n_embd/n_head, n_head, N);
@@ -2797,8 +2794,6 @@ static struct ggml_cgraph * llm_build_baichaun(
27972794

27982795
offload_func_kq(Qcur);
27992796
ggml_set_name(Qcur, "Qcur");
2800-
2801-
28022797

28032798
// store key and value to memory
28042799
{
@@ -2853,13 +2848,11 @@ static struct ggml_cgraph * llm_build_baichaun(
28532848

28542849
struct ggml_tensor * KQ_masked;
28552850
struct ggml_tensor * KQ_scaled_alibi;
2856-
// if model.type == MODEL_13B,here add kq_scaled_alibi
2857-
switch (model.type)
2858-
{
2851+
2852+
switch (model.type) {
28592853
case MODEL_7B:
28602854
KQ_masked = ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past);
28612855
break;
2862-
28632856
case MODEL_13B:
28642857
KQ_scaled_alibi =ggml_alibi(ctx0, KQ_scaled, n_past, n_head, 8);
28652858
ggml_set_name(KQ_scaled_alibi, "KQ_scaled_alibi");

0 commit comments

Comments
 (0)