Skip to content

Commit 6072907

Browse files
feat: adding cache to LLMEndpoint (#3555)
This allows us to avoid repeating expensive operations, such as reloading the tokenizers, at each call Closes ENT-394
1 parent acb0bcb commit 6072907

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

core/quivr_core/llm/llm_endpoint.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919

2020
class LLMEndpoint:
21+
_cache: dict[int, "LLMEndpoint"] = {}
22+
2123
def __init__(self, llm_config: LLMEndpointConfig, llm: BaseChatModel):
2224
self._config = llm_config
2325
self._llm = llm
@@ -55,6 +57,13 @@ def get_config(self):
5557

5658
@classmethod
5759
def from_config(cls, config: LLMEndpointConfig = LLMEndpointConfig()):
60+
# Create a cache key from the config
61+
cache_key = hash(str(config.model_dump()))
62+
63+
# Return cached instance if it exists
64+
if cache_key in cls._cache:
65+
return cls._cache[cache_key]
66+
5867
_llm: Union[AzureChatOpenAI, ChatOpenAI, ChatAnthropic, ChatMistralAI]
5968
try:
6069
if config.supplier == DefaultModelSuppliers.AZURE:
@@ -112,7 +121,9 @@ def from_config(cls, config: LLMEndpointConfig = LLMEndpointConfig()):
112121
max_tokens=config.max_output_tokens,
113122
temperature=config.temperature,
114123
)
115-
return cls(llm=_llm, llm_config=config)
124+
instance = cls(llm=_llm, llm_config=config)
125+
cls._cache[cache_key] = instance
126+
return instance
116127

117128
except ImportError as e:
118129
raise ImportError(

0 commit comments

Comments
 (0)