@@ -86,73 +86,73 @@ class LLMModelConfig:
86
86
"gpt-4o" : LLMConfig (
87
87
max_context_tokens = 128000 ,
88
88
max_output_tokens = 16384 ,
89
- tokenizer_hub = "Xenova /gpt-4o" ,
89
+ tokenizer_hub = "Quivr /gpt-4o" ,
90
90
),
91
91
"gpt-4o-mini" : LLMConfig (
92
92
max_context_tokens = 128000 ,
93
93
max_output_tokens = 16384 ,
94
- tokenizer_hub = "Xenova /gpt-4o" ,
94
+ tokenizer_hub = "Quivr /gpt-4o" ,
95
95
),
96
96
"gpt-4-turbo" : LLMConfig (
97
97
max_context_tokens = 128000 ,
98
98
max_output_tokens = 4096 ,
99
- tokenizer_hub = "Xenova /gpt-4" ,
99
+ tokenizer_hub = "Quivr /gpt-4" ,
100
100
),
101
101
"gpt-4" : LLMConfig (
102
102
max_context_tokens = 8192 ,
103
103
max_output_tokens = 8192 ,
104
- tokenizer_hub = "Xenova /gpt-4" ,
104
+ tokenizer_hub = "Quivr /gpt-4" ,
105
105
),
106
106
"gpt-3.5-turbo" : LLMConfig (
107
107
max_context_tokens = 16385 ,
108
108
max_output_tokens = 4096 ,
109
- tokenizer_hub = "Xenova /gpt-3.5-turbo" ,
109
+ tokenizer_hub = "Quivr /gpt-3.5-turbo" ,
110
110
),
111
111
"text-embedding-3-large" : LLMConfig (
112
- max_context_tokens = 8191 , tokenizer_hub = "Xenova /text-embedding-ada-002"
112
+ max_context_tokens = 8191 , tokenizer_hub = "Quivr /text-embedding-ada-002"
113
113
),
114
114
"text-embedding-3-small" : LLMConfig (
115
- max_context_tokens = 8191 , tokenizer_hub = "Xenova /text-embedding-ada-002"
115
+ max_context_tokens = 8191 , tokenizer_hub = "Quivr /text-embedding-ada-002"
116
116
),
117
117
"text-embedding-ada-002" : LLMConfig (
118
- max_context_tokens = 8191 , tokenizer_hub = "Xenova /text-embedding-ada-002"
118
+ max_context_tokens = 8191 , tokenizer_hub = "Quivr /text-embedding-ada-002"
119
119
),
120
120
},
121
121
DefaultModelSuppliers .ANTHROPIC : {
122
122
"claude-3-5-sonnet" : LLMConfig (
123
123
max_context_tokens = 200000 ,
124
124
max_output_tokens = 8192 ,
125
- tokenizer_hub = "Xenova /claude-tokenizer" ,
125
+ tokenizer_hub = "Quivr /claude-tokenizer" ,
126
126
),
127
127
"claude-3-opus" : LLMConfig (
128
128
max_context_tokens = 200000 ,
129
129
max_output_tokens = 4096 ,
130
- tokenizer_hub = "Xenova /claude-tokenizer" ,
130
+ tokenizer_hub = "Quivr /claude-tokenizer" ,
131
131
),
132
132
"claude-3-sonnet" : LLMConfig (
133
133
max_context_tokens = 200000 ,
134
134
max_output_tokens = 4096 ,
135
- tokenizer_hub = "Xenova /claude-tokenizer" ,
135
+ tokenizer_hub = "Quivr /claude-tokenizer" ,
136
136
),
137
137
"claude-3-haiku" : LLMConfig (
138
138
max_context_tokens = 200000 ,
139
139
max_output_tokens = 4096 ,
140
- tokenizer_hub = "Xenova /claude-tokenizer" ,
140
+ tokenizer_hub = "Quivr /claude-tokenizer" ,
141
141
),
142
142
"claude-2-1" : LLMConfig (
143
143
max_context_tokens = 200000 ,
144
144
max_output_tokens = 4096 ,
145
- tokenizer_hub = "Xenova /claude-tokenizer" ,
145
+ tokenizer_hub = "Quivr /claude-tokenizer" ,
146
146
),
147
147
"claude-2-0" : LLMConfig (
148
148
max_context_tokens = 100000 ,
149
149
max_output_tokens = 4096 ,
150
- tokenizer_hub = "Xenova /claude-tokenizer" ,
150
+ tokenizer_hub = "Quivr /claude-tokenizer" ,
151
151
),
152
152
"claude-instant-1-2" : LLMConfig (
153
153
max_context_tokens = 100000 ,
154
154
max_output_tokens = 4096 ,
155
- tokenizer_hub = "Xenova /claude-tokenizer" ,
155
+ tokenizer_hub = "Quivr /claude-tokenizer" ,
156
156
),
157
157
},
158
158
# Unclear for LLAMA models...
@@ -161,53 +161,53 @@ class LLMModelConfig:
161
161
"llama-3.1" : LLMConfig (
162
162
max_context_tokens = 128000 ,
163
163
max_output_tokens = 4096 ,
164
- tokenizer_hub = "Xenova /Meta-Llama-3.1-Tokenizer" ,
164
+ tokenizer_hub = "Quivr /Meta-Llama-3.1-Tokenizer" ,
165
165
),
166
166
"llama-3" : LLMConfig (
167
167
max_context_tokens = 8192 ,
168
168
max_output_tokens = 2048 ,
169
- tokenizer_hub = "Xenova /llama3-tokenizer-new" ,
169
+ tokenizer_hub = "Quivr /llama3-tokenizer-new" ,
170
170
),
171
171
"code-llama" : LLMConfig (
172
- max_context_tokens = 16384 , tokenizer_hub = "Xenova /llama-code-tokenizer"
172
+ max_context_tokens = 16384 , tokenizer_hub = "Quivr /llama-code-tokenizer"
173
173
),
174
174
},
175
175
DefaultModelSuppliers .GROQ : {
176
176
"llama-3.3-70b" : LLMConfig (
177
177
max_context_tokens = 128000 ,
178
178
max_output_tokens = 32768 ,
179
- tokenizer_hub = "Xenova /Meta-Llama-3.1-Tokenizer" ,
179
+ tokenizer_hub = "Quivr /Meta-Llama-3.1-Tokenizer" ,
180
180
),
181
181
"llama-3.1-70b" : LLMConfig (
182
182
max_context_tokens = 128000 ,
183
183
max_output_tokens = 32768 ,
184
- tokenizer_hub = "Xenova /Meta-Llama-3.1-Tokenizer" ,
184
+ tokenizer_hub = "Quivr /Meta-Llama-3.1-Tokenizer" ,
185
185
),
186
186
"llama-3" : LLMConfig (
187
- max_context_tokens = 8192 , tokenizer_hub = "Xenova /llama3-tokenizer-new"
187
+ max_context_tokens = 8192 , tokenizer_hub = "Quivr /llama3-tokenizer-new"
188
188
),
189
189
"code-llama" : LLMConfig (
190
- max_context_tokens = 16384 , tokenizer_hub = "Xenova /llama-code-tokenizer"
190
+ max_context_tokens = 16384 , tokenizer_hub = "Quivr /llama-code-tokenizer"
191
191
),
192
192
},
193
193
DefaultModelSuppliers .MISTRAL : {
194
194
"mistral-large" : LLMConfig (
195
195
max_context_tokens = 128000 ,
196
196
max_output_tokens = 4096 ,
197
- tokenizer_hub = "Xenova /mistral-tokenizer-v3" ,
197
+ tokenizer_hub = "Quivr /mistral-tokenizer-v3" ,
198
198
),
199
199
"mistral-small" : LLMConfig (
200
200
max_context_tokens = 128000 ,
201
201
max_output_tokens = 4096 ,
202
- tokenizer_hub = "Xenova /mistral-tokenizer-v3" ,
202
+ tokenizer_hub = "Quivr /mistral-tokenizer-v3" ,
203
203
),
204
204
"mistral-nemo" : LLMConfig (
205
205
max_context_tokens = 128000 ,
206
206
max_output_tokens = 4096 ,
207
- tokenizer_hub = "Xenova /Mistral-Nemo-Instruct-Tokenizer" ,
207
+ tokenizer_hub = "Quivr /Mistral-Nemo-Instruct-Tokenizer" ,
208
208
),
209
209
"codestral" : LLMConfig (
210
- max_context_tokens = 32000 , tokenizer_hub = "Xenova /mistral-tokenizer-v3"
210
+ max_context_tokens = 32000 , tokenizer_hub = "Quivr /mistral-tokenizer-v3"
211
211
),
212
212
},
213
213
}
@@ -247,9 +247,9 @@ class LLMEndpointConfig(QuivrBaseConfig):
247
247
llm_base_url : str | None = None
248
248
env_variable_name : str | None = None
249
249
llm_api_key : str | None = None
250
- max_context_tokens : int = 10000
251
- max_output_tokens : int = 4000
252
- temperature : float = 0.7
250
+ max_context_tokens : int = 20000
251
+ max_output_tokens : int = 4096
252
+ temperature : float = 0.3
253
253
streaming : bool = True
254
254
prompt : CustomPromptsModel | None = None
255
255
0 commit comments