Skip to content

Commit 2fbd5d4

Browse files
feat: remove pympler dependency and add better way to calculate size of tokenizer cache (#3580)
1 parent e2a3bcb commit 2fbd5d4

File tree

4 files changed

+26
-13
lines changed

4 files changed

+26
-13
lines changed

core/pyproject.toml

-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ dependencies = [
2525
"langchain-mistralai>=0.2.3",
2626
"fasttext-langdetect>=1.0.5",
2727
"langfuse>=2.57.0",
28-
"pympler>=1.1",
2928
]
3029
readme = "README.md"
3130
requires-python = ">= 3.11"

core/quivr_core/llm/llm_endpoint.py

+26-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22
import os
3-
from typing import Union, Any
3+
from typing import Union
44
from urllib.parse import parse_qs, urlparse
55

66
import tiktoken
@@ -10,7 +10,6 @@
1010
from langchain_openai import AzureChatOpenAI, ChatOpenAI
1111
from pydantic import SecretStr
1212
import time
13-
from pympler import asizeof
1413

1514
from quivr_core.brain.info import LLMInfo
1615
from quivr_core.rag.entities.config import DefaultModelSuppliers, LLMEndpointConfig
@@ -19,17 +18,14 @@
1918
logger = logging.getLogger("quivr_core")
2019

2120

22-
def get_size(obj: Any, seen: set | None = None) -> int:
23-
return asizeof.asizeof(obj)
24-
25-
2621
class LLMTokenizer:
2722
_cache: dict[
2823
int, tuple["LLMTokenizer", int, float]
2924
] = {} # {hash: (tokenizer, size_bytes, last_access_time)}
3025
_max_cache_size_mb: int = 50
31-
_max_cache_count: int = 3 # Default maximum number of cached tokenizers
26+
_max_cache_count: int = 5 # Default maximum number of cached tokenizers
3227
_current_cache_size: int = 0
28+
_default_size: int = 5 * 1024 * 1024
3329

3430
def __init__(self, tokenizer_hub: str | None, fallback_tokenizer: str):
3531
self.tokenizer_hub = tokenizer_hub
@@ -63,7 +59,29 @@ def __init__(self, tokenizer_hub: str | None, fallback_tokenizer: str):
6359
self.tokenizer = tiktoken.get_encoding(self.fallback_tokenizer)
6460

6561
# More accurate size estimation
66-
self._size_bytes = get_size(self.tokenizer)
62+
self._size_bytes = self._calculate_tokenizer_size()
63+
64+
def _calculate_tokenizer_size(self) -> int:
65+
"""Calculate size of tokenizer by summing the sizes of its vocabulary and model files"""
66+
# By default, return a size of 5 MB
67+
if not hasattr(self.tokenizer, "vocab_files_names") or not hasattr(
68+
self.tokenizer, "init_kwargs"
69+
):
70+
return self._default_size
71+
72+
total_size = 0
73+
74+
# Get the file keys from vocab_files_names
75+
file_keys = self.tokenizer.vocab_files_names.keys()
76+
# Look up these files in init_kwargs
77+
for key in file_keys:
78+
if file_path := self.tokenizer.init_kwargs.get(key):
79+
try:
80+
total_size += os.path.getsize(file_path)
81+
except (OSError, FileNotFoundError):
82+
logger.debug(f"Could not access tokenizer file: {file_path}")
83+
84+
return total_size if total_size > 0 else self._default_size
6785

6886
@classmethod
6987
def load(cls, tokenizer_hub: str, fallback_tokenizer: str):

core/requirements-dev.lock

-2
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,6 @@ pyflakes==3.2.0
296296
pygments==2.18.0
297297
# via ipython
298298
# via rich
299-
pympler==1.1
300-
# via quivr-core
301299
pytest==8.3.3
302300
# via pytest-asyncio
303301
# via pytest-benchmark

core/requirements.lock

-2
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,6 @@ pydantic-settings==2.6.1
214214
# via langchain-community
215215
pygments==2.18.0
216216
# via rich
217-
pympler==1.1
218-
# via quivr-core
219217
python-dateutil==2.8.2
220218
# via pandas
221219
python-dotenv==1.0.1

0 commit comments

Comments
 (0)