Skip to content

Update embedders settings, hybrid search, and add tests for AI search methods #1087

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove validation to let meilisearch handle it
Strift committed Mar 26, 2025

Verified

This commit was signed with the committer’s verified signature.
euclio Andy Russell
commit 8a4369d4d41779f6d8f58007a45a94e4e587c203
19 changes: 2 additions & 17 deletions meilisearch/index.py
Original file line number Diff line number Diff line change
@@ -40,7 +40,6 @@
OpenAiEmbedder,
RestEmbedder,
UserProvidedEmbedder,
validate_embedders,
)
from meilisearch.models.task import Task, TaskInfo, TaskResults
from meilisearch.task import TaskHandler
@@ -1012,21 +1011,12 @@ def update_settings(self, body: MutableMapping[str, Any]) -> TaskInfo:

Raises
------
ValueError
If the provided embedder configuration is invalid.
MeilisearchApiError
An error containing details about why Meilisearch can't process your request.
Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
# Create a copy of the body to avoid modifying the original
body_copy = body.copy()

# Validate embedders if present
if "embedders" in body_copy:
body_copy["embedders"] = validate_embedders(body_copy["embedders"])

task = self.http.patch(
f"{self.config.paths.index}/{self.uid}/{self.config.paths.setting}", body_copy
f"{self.config.paths.index}/{self.uid}/{self.config.paths.setting}", body
)

return TaskInfo(**task)
@@ -1986,16 +1976,11 @@ def update_embedders(self, body: Union[MutableMapping[str, Any], None]) -> TaskI

Raises
------
ValueError
If the provided embedder configuration is invalid.
MeilisearchApiError
An error containing details about why Meilisearch can't process your request.
Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
# Validate embedders
validated_body = validate_embedders(body) if body else None

task = self.http.patch(self.__settings_url_for(self.config.paths.embedders), validated_body)
task = self.http.patch(self.__settings_url_for(self.config.paths.embedders), body)

return TaskInfo(**task)

102 changes: 1 addition & 101 deletions meilisearch/models/embedders.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from __future__ import annotations

from typing import Any, Dict, List, Optional, Union, Mapping, MutableMapping, Type, TypeVar
from typing import Any, Dict, List, Optional, Union

from camel_converter.pydantic_base import CamelBase


T = TypeVar("T", bound="CamelBase")


class Distribution(CamelBase):
"""Distribution settings for embedders.

@@ -212,100 +209,3 @@ class Embedders(CamelBase):
OpenAiEmbedder, HuggingFaceEmbedder, OllamaEmbedder, RestEmbedder, UserProvidedEmbedder
],
]


def validate_embedder_config(embedder_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
"""Validate an embedder configuration.

Parameters
----------
embedder_name: str
The name of the embedder
config: Dict[str, Any]
The embedder configuration

Returns
-------
Dict[str, Any]
The validated and cleaned embedder configuration

Raises
------
ValueError
If the configuration is invalid
"""
# Validate source field
source = config.get("source")
if source not in ["openAi", "huggingFace", "ollama", "rest", "userProvided"]:
raise ValueError(
f"Invalid source for embedder '{embedder_name}'. "
f"Must be one of: 'openAi', 'huggingFace', 'ollama', 'rest', 'userProvided'."
)

# Create a copy of the config to avoid modifying the original
cleaned_config = config.copy()

# Validate based on source type
if source == "openAi":
OpenAiEmbedder(**cleaned_config)
elif source == "huggingFace":
HuggingFaceEmbedder(**cleaned_config)
elif source == "ollama":
OllamaEmbedder(**cleaned_config)
elif source == "rest":
# Validate required fields for REST embedder
if "request" not in cleaned_config or "response" not in cleaned_config:
raise ValueError(
f"Embedder '{embedder_name}' with source 'rest' must include 'request' and 'response' fields."
)
RestEmbedder(**cleaned_config)
elif source == "userProvided":
# Validate required fields for UserProvided embedder
if "dimensions" not in cleaned_config:
raise ValueError(
f"Embedder '{embedder_name}' with source 'userProvided' must include 'dimensions' field."
)

# Remove fields not supported by UserProvided
for field in ["documentTemplate", "documentTemplateMaxBytes"]:
if field in cleaned_config:
del cleaned_config[field]

UserProvidedEmbedder(**cleaned_config)

# Clean up None values for optional fields
if (
"documentTemplateMaxBytes" in cleaned_config
and cleaned_config["documentTemplateMaxBytes"] is None
):
del cleaned_config["documentTemplateMaxBytes"]

return cleaned_config


def validate_embedders(embedders: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
"""Validate a dictionary of embedder configurations.

Parameters
----------
embedders: MutableMapping[str, Any]
Dictionary of embedder configurations

Returns
-------
MutableMapping[str, Any]
The validated and cleaned embedder configurations

Raises
------
ValueError
If any configuration is invalid
"""
if not embedders:
return embedders

cleaned_embedders = {}
for embedder_name, config in embedders.items():
cleaned_embedders[embedder_name] = validate_embedder_config(embedder_name, config)

return cleaned_embedders