@@ -120,6 +120,8 @@ def parse_args() -> argparse.Namespace:
120
120
ctx_length = hparams ["max_sequence_length" ]
121
121
elif "max_position_embeddings" in hparams :
122
122
ctx_length = hparams ["max_position_embeddings" ]
123
+ elif "model_max_length" in hparams :
124
+ ctx_length = hparams ["model_max_length" ]
123
125
else :
124
126
print ("gguf: can not find ctx length parameter." )
125
127
@@ -231,12 +233,7 @@ def parse_args() -> argparse.Namespace:
231
233
232
234
tmp = model_part
233
235
for i in itertools .count ():
234
- if f"model.layers.{ i } .self_attn.q_proj.weight" in model_part :
235
- print (f"Permuting layer { i } " )
236
- tmp [f"model.layers.{ i } .self_attn.q_proj.weight" ] = reverse_hf_permute (model_part [f"model.layers.{ i } .self_attn.q_proj.weight" ], head_count , head_count )
237
- tmp [f"model.layers.{ i } .self_attn.k_proj.weight" ] = reverse_hf_permute (model_part [f"model.layers.{ i } .self_attn.k_proj.weight" ], head_count , head_count_kv )
238
- #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
239
- elif f"model.layers.{ i } .self_attn.W_pack.weight" in model_part :
236
+ if f"model.layers.{ i } .self_attn.W_pack.weight" in model_part :
240
237
print (f"Unpacking and permuting layer { i } " )
241
238
tmp [f"model.layers.{ i } .self_attn.q_proj.weight" ]= reverse_hf_permute_part (model_part [f"model.layers.{ i } .self_attn.W_pack.weight" ],0 ,head_count ,head_count )
242
239
tmp [f"model.layers.{ i } .self_attn.k_proj.weight" ]= reverse_hf_permute_part (model_part [f"model.layers.{ i } .self_attn.W_pack.weight" ],1 ,head_count ,head_count_kv )
@@ -259,14 +256,6 @@ def parse_args() -> argparse.Namespace:
259
256
260
257
data = data .squeeze ().numpy ()
261
258
262
- # reverse permute these
263
- # if name.endswith(".q_proj.weight"):
264
- # data = reverse_hf_permute(data, head_count)
265
- # if name.endswith(".k_proj.weight"):
266
- # data = reverse_hf_permute(data, head_count, head_count_kv)
267
-
268
-
269
-
270
259
# map tensor names
271
260
new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
272
261
if new_name is None :
@@ -290,7 +279,6 @@ def parse_args() -> argparse.Namespace:
290
279
291
280
print (name + " -> " + new_name + ", n_dims = " + str (n_dims ) + ", " + str (old_dtype ) + " --> " + str (data .dtype ))
292
281
293
-
294
282
gguf_writer .add_tensor (new_name , data )
295
283
296
284
0 commit comments