@@ -151,8 +151,11 @@ class Params:
151
151
n_head_kv : int
152
152
f_norm_eps : float
153
153
154
+ rope_scaling_type : gguf .RopeScalingType | None = None
154
155
f_rope_freq_base : float | None = None
155
156
f_rope_scale : float | None = None
157
+ n_orig_ctx : int | None = None
158
+ rope_finetuned : bool | None = None
156
159
157
160
ftype : GGMLFileType | None = None
158
161
@@ -198,20 +201,20 @@ def guessed(model: LazyModel) -> Params:
198
201
def loadHFTransformerJson (model : LazyModel , config_path : Path ) -> Params :
199
202
config = json .load (open (config_path ))
200
203
201
- n_vocab = config ["vocab_size" ]
202
- n_embd = config ["hidden_size" ]
203
- n_layer = config ["num_hidden_layers" ]
204
- n_ff = config ["intermediate_size" ]
205
- n_head = config ["num_attention_heads" ]
206
- n_head_kv = config ["num_key_value_heads" ] if "num_key_value_heads" in config else n_head
207
- f_norm_eps = config ["rms_norm_eps" ]
208
- f_rope_freq_base = config ["rope_theta" ] if "rope_theta" in config else None
209
-
204
+ rope_scaling_type = f_rope_scale = n_orig_ctx = rope_finetuned = None
210
205
rope_scaling = config .get ("rope_scaling" )
211
- if isinstance (rope_scaling , dict ) and rope_scaling .get ("type" ) == "linear" :
212
- f_rope_scale = config ["rope_scaling" ].get ("factor" )
213
- else :
214
- f_rope_scale = None
206
+
207
+ if rope_scaling is not None and (typ := rope_scaling .get ("type" )):
208
+ rope_factor = rope_scaling .get ("factor" )
209
+ f_rope_scale = rope_factor
210
+ if typ == "linear" :
211
+ rope_scaling_type = gguf .RopeScalingType .LINEAR
212
+ elif typ == "yarn" :
213
+ rope_scaling_type = gguf .RopeScalingType .YARN
214
+ n_orig_ctx = rope_scaling ['original_max_position_embeddings' ]
215
+ rope_finetuned = rope_scaling ['finetuned' ]
216
+ else :
217
+ raise NotImplementedError (f'Unknown rope scaling type: { typ } ' )
215
218
216
219
if "max_sequence_length" in config :
217
220
n_ctx = config ["max_sequence_length" ]
@@ -222,16 +225,19 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
222
225
"Suggestion: provide 'config.json' of the model in the same directory containing model files." )
223
226
224
227
return Params (
225
- n_vocab = n_vocab ,
226
- n_embd = n_embd ,
227
- n_layer = n_layer ,
228
- n_ctx = n_ctx ,
229
- n_ff = n_ff ,
230
- n_head = n_head ,
231
- n_head_kv = n_head_kv ,
232
- f_norm_eps = f_norm_eps ,
233
- f_rope_freq_base = f_rope_freq_base ,
234
- f_rope_scale = f_rope_scale ,
228
+ n_vocab = config ["vocab_size" ],
229
+ n_embd = config ["hidden_size" ],
230
+ n_layer = config ["num_hidden_layers" ],
231
+ n_ctx = n_ctx ,
232
+ n_ff = config ["intermediate_size" ],
233
+ n_head = (n_head := config ["num_attention_heads" ]),
234
+ n_head_kv = config .get ("num_key_value_heads" , n_head ),
235
+ f_norm_eps = config ["rms_norm_eps" ],
236
+ f_rope_freq_base = config .get ("rope_theta" ),
237
+ rope_scaling_type = rope_scaling_type ,
238
+ f_rope_scale = f_rope_scale ,
239
+ n_orig_ctx = n_orig_ctx ,
240
+ rope_finetuned = rope_finetuned ,
235
241
)
236
242
237
243
# LLaMA v2 70B params.json
@@ -240,17 +246,8 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
240
246
def loadOriginalParamsJson (model : LazyModel , config_path : Path ) -> Params :
241
247
config = json .load (open (config_path ))
242
248
243
- n_vocab = config ["vocab_size" ] if "vocab_size" in config else - 1
244
- n_embd = config ["dim" ]
245
- n_layer = config ["n_layers" ]
246
- n_ff = - 1
247
- n_head = config ["n_heads" ]
248
- n_head_kv = config ["n_kv_heads" ] if "n_kv_heads" in config else n_head
249
- f_norm_eps = config ["norm_eps" ]
250
- f_rope_freq_base = config ["rope_theta" ] if "rope_theta" in config else None
251
-
252
249
# hack to determine LLaMA v1 vs v2 vs CodeLlama
253
- if f_rope_freq_base == 1000000 :
250
+ if config . get ( "rope_theta" ) == 1000000 :
254
251
# CodeLlama
255
252
n_ctx = 16384
256
253
elif config ["norm_eps" ] == 1e-05 :
@@ -260,22 +257,16 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
260
257
# LLaMA v1
261
258
n_ctx = 2048
262
259
263
- if n_vocab == - 1 :
264
- n_vocab = model ["tok_embeddings.weight" ].shape [0 ]
265
-
266
- if n_ff == - 1 :
267
- n_ff = model ["layers.0.feed_forward.w1.weight" ].shape [0 ]
268
-
269
260
return Params (
270
- n_vocab = n_vocab ,
271
- n_embd = n_embd ,
272
- n_layer = n_layer ,
261
+ n_vocab = config . get ( "vocab_size" , model [ "tok_embeddings.weight" ]. shape [ 0 ]) ,
262
+ n_embd = config [ "dim" ] ,
263
+ n_layer = config [ "n_layers" ] ,
273
264
n_ctx = n_ctx ,
274
- n_ff = n_ff ,
275
- n_head = n_head ,
276
- n_head_kv = n_head_kv ,
277
- f_norm_eps = f_norm_eps ,
278
- f_rope_freq_base = f_rope_freq_base ,
265
+ n_ff = model [ "layers.0.feed_forward.w1.weight" ]. shape [ 0 ] ,
266
+ n_head = ( n_head := config [ "n_heads" ]) ,
267
+ n_head_kv = config . get ( "n_kv_heads" , n_head ) ,
268
+ f_norm_eps = config [ "norm_eps" ] ,
269
+ f_rope_freq_base = config . get ( "rope_theta" ) ,
279
270
)
280
271
281
272
@staticmethod
@@ -831,8 +822,16 @@ def add_meta_arch(self, params: Params) -> None:
831
822
if params .f_rope_freq_base is not None :
832
823
self .gguf .add_rope_freq_base (params .f_rope_freq_base )
833
824
834
- if params .f_rope_scale is not None :
835
- self .gguf .add_rope_scale_linear (params .f_rope_scale )
825
+ if params .rope_scaling_type :
826
+ assert params .f_rope_scale is not None
827
+ self .gguf .add_rope_scaling_type (params .rope_scaling_type )
828
+ self .gguf .add_rope_scaling_factor (params .f_rope_scale )
829
+
830
+ if params .n_orig_ctx is not None :
831
+ self .gguf .add_rope_scaling_orig_ctx_len (params .n_orig_ctx )
832
+
833
+ if params .rope_finetuned is not None :
834
+ self .gguf .add_rope_scaling_finetuned (params .rope_finetuned )
836
835
837
836
if params .ftype is not None :
838
837
self .gguf .add_file_type (params .ftype )
0 commit comments