@@ -773,6 +773,148 @@ def _reverse_hf_part(self, weights: Tensor, n_part: int) -> Tensor:
773
773
return weights [r * n_part :r * n_part + r , ...]
774
774
775
775
776
+ @Model .register ("XverseForCausalLM" )
777
+ class XverseModel (Model ):
778
+ model_arch = gguf .MODEL_ARCH .XVERSE
779
+
780
+ def set_vocab (self ):
781
+ assert (self .dir_model / "tokenizer.json" ).is_file ()
782
+ dir_model = self .dir_model
783
+ hparams = self .hparams
784
+
785
+ tokens : list [bytearray ] = []
786
+ toktypes : list [int ] = []
787
+
788
+ from transformers import AutoTokenizer
789
+ tokenizer = AutoTokenizer .from_pretrained (dir_model )
790
+ vocab_size = hparams .get ("vocab_size" , len (tokenizer .vocab ))
791
+ assert max (tokenizer .vocab .values ()) < vocab_size
792
+
793
+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in tokenizer .vocab .items ()}
794
+ added_vocab = tokenizer .get_added_vocab ()
795
+
796
+ for token_id in range (vocab_size ):
797
+ token_text = reverse_vocab [token_id ].encode ('utf-8' )
798
+ # replace "\x00" to string with length > 0
799
+ if token_text == b"\x00 " :
800
+ toktype = gguf .TokenType .BYTE # special
801
+ token_text = f"<{ token_text } >" .encode ('utf-8' )
802
+ elif re .fullmatch (br"<0x[0-9A-Fa-f]{2}>" , token_text ):
803
+ toktype = gguf .TokenType .BYTE # special
804
+ elif reverse_vocab [token_id ] in added_vocab :
805
+ if tokenizer .added_tokens_decoder [token_id ].special :
806
+ toktype = gguf .TokenType .CONTROL
807
+ else :
808
+ toktype = gguf .TokenType .USER_DEFINED
809
+ else :
810
+ toktype = gguf .TokenType .NORMAL
811
+
812
+ tokens .append (token_text )
813
+ toktypes .append (toktype )
814
+
815
+ self .gguf_writer .add_tokenizer_model ("llama" )
816
+ self .gguf_writer .add_token_list (tokens )
817
+ self .gguf_writer .add_token_types (toktypes )
818
+
819
+ special_vocab = gguf .SpecialVocab (dir_model , n_vocab = len (tokens ))
820
+ special_vocab .add_to_gguf (self .gguf_writer )
821
+
822
+ def set_gguf_parameters (self ):
823
+ block_count = self .hparams ["num_hidden_layers" ]
824
+ head_count = self .hparams ["num_attention_heads" ]
825
+ head_count_kv = self .hparams .get ("num_key_value_heads" , head_count )
826
+ hf_repo = self .hparams .get ("_name_or_path" , "" )
827
+
828
+ ctx_length = 0
829
+ if "max_sequence_length" in self .hparams :
830
+ ctx_length = self .hparams ["max_sequence_length" ]
831
+ elif "max_position_embeddings" in self .hparams :
832
+ ctx_length = self .hparams ["max_position_embeddings" ]
833
+ elif "model_max_length" in self .hparams :
834
+ ctx_length = self .hparams ["model_max_length" ]
835
+ else :
836
+ print ("gguf: can not find ctx length parameter." )
837
+ sys .exit ()
838
+
839
+ self .gguf_writer .add_name (self .dir_model .name )
840
+ self .gguf_writer .add_source_hf_repo (hf_repo )
841
+ self .gguf_writer .add_tensor_data_layout ("Meta AI original pth" )
842
+ self .gguf_writer .add_context_length (ctx_length )
843
+ self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
844
+ self .gguf_writer .add_block_count (block_count )
845
+ self .gguf_writer .add_feed_forward_length (self .hparams ["intermediate_size" ])
846
+ self .gguf_writer .add_rope_dimension_count (self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ])
847
+ self .gguf_writer .add_head_count (head_count )
848
+ self .gguf_writer .add_head_count_kv (head_count_kv )
849
+ self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["rms_norm_eps" ])
850
+
851
+ if self .hparams .get ("rope_scaling" ) is not None and "factor" in self .hparams ["rope_scaling" ]:
852
+ if self .hparams ["rope_scaling" ].get ("type" ) == "linear" :
853
+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
854
+ self .gguf_writer .add_rope_scaling_factor (self .hparams ["rope_scaling" ]["factor" ])
855
+
856
+ def write_tensors (self ):
857
+ # Collect tensors from generator object
858
+ model_kv = dict (self .get_tensors ())
859
+ block_count = self .hparams ["num_hidden_layers" ]
860
+ head_count = self .hparams ["num_attention_heads" ]
861
+ tensor_map = gguf .get_tensor_name_map (self .model_arch , block_count )
862
+ head_count_kv = self .hparams .get ("num_key_value_heads" , head_count )
863
+
864
+ for name , data_torch in model_kv .items ():
865
+ # we don't need these
866
+ if name .endswith (".rotary_emb.inv_freq" ):
867
+ continue
868
+
869
+ old_dtype = data_torch .dtype
870
+
871
+ # convert any unsupported data types to float32
872
+ if data_torch .dtype not in (torch .float16 , torch .float32 ):
873
+ data_torch = data_torch .to (torch .float32 )
874
+
875
+ # HF models permute some of the tensors, so we need to undo that
876
+ if name .endswith (("q_proj.weight" )):
877
+ data_torch = self ._reverse_hf_permute (data_torch , head_count , head_count )
878
+ if name .endswith (("k_proj.weight" )):
879
+ data_torch = self ._reverse_hf_permute (data_torch , head_count , head_count_kv )
880
+
881
+ data = data_torch .squeeze ().numpy ()
882
+
883
+ # map tensor names
884
+ new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
885
+ if new_name is None :
886
+ print (f"Can not map tensor { name !r} " )
887
+ sys .exit ()
888
+
889
+ n_dims = len (data .shape )
890
+ data_dtype = data .dtype
891
+
892
+ # if f32 desired, convert any float16 to float32
893
+ if self .ftype == 0 and data_dtype == np .float16 :
894
+ data = data .astype (np .float32 )
895
+
896
+ # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
897
+ if self .ftype == 1 and data_dtype == np .float16 and n_dims == 1 :
898
+ data = data .astype (np .float32 )
899
+
900
+ # if f16 desired, convert any float32 2-dim weight tensors to float16
901
+ if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
902
+ data = data .astype (np .float16 )
903
+
904
+ print (f"{ name } -> { new_name } , n_dims = { n_dims } , { old_dtype } --> { data .dtype } " )
905
+ self .gguf_writer .add_tensor (new_name , data )
906
+
907
+ def _reverse_hf_permute (self , weights : Tensor , n_head : int , n_kv_head : int | None = None ) -> Tensor :
908
+ if n_kv_head is not None and n_head != n_kv_head :
909
+ n_head //= n_kv_head
910
+
911
+ return (
912
+ weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
913
+ .swapaxes (1 , 2 )
914
+ .reshape (weights .shape )
915
+ )
916
+
917
+
776
918
@Model .register ("FalconForCausalLM" , "RWForCausalLM" )
777
919
class FalconModel (Model ):
778
920
model_arch = gguf .MODEL_ARCH .FALCON
0 commit comments