Improved examples [skip ci]

ankane · ankane · commit 631ea38a0e15 · 2025-02-16T20:54:17.000-08:00
diff --git a/examples/bumblebee/example.exs b/examples/bumblebee/example.exs
@@ -16,7 +16,7 @@ model_id = "sentence-transformers/all-MiniLM-L6-v2"
 {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, model_id})
 
 defmodule Example do
-  def fetch_embeddings(model_info, tokenizer, input) do
+  def embed(model_info, tokenizer, input) do
     serving =
       Bumblebee.Text.text_embedding(model_info, tokenizer,
         output_attribute: :hidden_state,
@@ -34,7 +34,7 @@ input = [
   "The bear is growling"
 ]
 
-embeddings = Example.fetch_embeddings(model_info, tokenizer, input)
+embeddings = Example.embed(model_info, tokenizer, input)
 
 for {content, embedding} <- Enum.zip(input, embeddings) do
   Postgrex.query!(pid, "INSERT INTO documents (content, embedding) VALUES ($1, $2)", [
diff --git a/examples/cohere/example.exs b/examples/cohere/example.exs
@@ -12,7 +12,7 @@ Postgrex.query!(
 )
 
 defmodule Example do
-  def fetch_embeddings(texts, input_type) do
+  def embed(texts, input_type) do
     api_key = System.fetch_env!("CO_API_KEY")
     url = "https://api.cohere.com/v1/embed"
 
@@ -42,7 +42,7 @@ input = [
   "The bear is growling"
 ]
 
-embeddings = Example.fetch_embeddings(input, "search_document")
+embeddings = Example.embed(input, "search_document")
 
 for {content, embedding} <- Enum.zip(input, embeddings) do
   Postgrex.query!(pid, "INSERT INTO documents (content, embedding) VALUES ($1, $2)", [
@@ -52,7 +52,7 @@ for {content, embedding} <- Enum.zip(input, embeddings) do
 end
 
 query = "forest"
-query_embedding = Example.fetch_embeddings([query], "search_query") |> List.first()
+query_embedding = Example.embed([query], "search_query") |> List.first()
 
 result =
   Postgrex.query!(
diff --git a/examples/hybrid_search/example.exs b/examples/hybrid_search/example.exs
@@ -18,7 +18,7 @@ model_id = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
 {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, model_id})
 
 defmodule Example do
-  def fetch_embeddings(model_info, tokenizer, input) do
+  def embed(model_info, tokenizer, input) do
     serving =
       Bumblebee.Text.text_embedding(model_info, tokenizer,
         output_attribute: :hidden_state,
@@ -36,7 +36,7 @@ input = [
   "The bear is growling"
 ]
 
-embeddings = Example.fetch_embeddings(model_info, tokenizer, input)
+embeddings = Example.embed(model_info, tokenizer, input)
 
 for {content, embedding} <- Enum.zip(input, embeddings) do
   Postgrex.query!(pid, "INSERT INTO documents (content, embedding) VALUES ($1, $2)", [
@@ -70,7 +70,7 @@ LIMIT 5
 """
 
 query = "growling bear"
-query_embedding = Example.fetch_embeddings(model_info, tokenizer, [query]) |> List.first()
+query_embedding = Example.embed(model_info, tokenizer, [query]) |> List.first()
 k = 60
 
 result = Postgrex.query!(pid, sql, [query, query_embedding, k])
diff --git a/examples/openai/example.exs b/examples/openai/example.exs
@@ -12,7 +12,7 @@ Postgrex.query!(
 )
 
 defmodule Example do
-  def fetch_embeddings(input) do
+  def embed(input) do
     api_key = System.fetch_env!("OPENAI_API_KEY")
     url = "https://api.openai.com/v1/embeddings"
 
@@ -38,7 +38,7 @@ input = [
   "The bear is growling"
 ]
 
-embeddings = Example.fetch_embeddings(input)
+embeddings = Example.embed(input)
 
 for {content, embedding} <- Enum.zip(input, embeddings) do
   Postgrex.query!(pid, "INSERT INTO documents (content, embedding) VALUES ($1, $2)", [
@@ -47,13 +47,14 @@ for {content, embedding} <- Enum.zip(input, embeddings) do
   ])
 end
 
-document_id = 1
+query = "forest"
+query_embedding = Example.embed([query]) |> List.first()
 
 result =
   Postgrex.query!(
     pid,
-    "SELECT id, content FROM documents WHERE id != $1 ORDER BY embedding <=> (SELECT embedding FROM documents WHERE id = $1) LIMIT 5",
-    [document_id]
+    "SELECT id, content FROM documents ORDER BY embedding <=> $1 LIMIT 5",
+    [query_embedding]
   )
 
 for [id, content] <- result.rows do
diff --git a/examples/sparse_search/example.exs b/examples/sparse_search/example.exs
@@ -20,7 +20,7 @@ model_id = "opensearch-project/opensearch-neural-sparse-encoding-v1"
 {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, model_id})
 
 defmodule Example do
-  def fetch_embeddings(model_info, tokenizer, input) do
+  def embed(model_info, tokenizer, input) do
     inputs = Bumblebee.apply_tokenizer(tokenizer, input)
     outputs = Axon.predict(model_info.model, model_info.params, inputs)
 
@@ -47,7 +47,7 @@ input = [
   "The bear is growling"
 ]
 
-embeddings = Example.fetch_embeddings(model_info, tokenizer, input)
+embeddings = Example.embed(model_info, tokenizer, input)
 
 for {content, embedding} <- Enum.zip(input, embeddings) do
   Postgrex.query!(pid, "INSERT INTO documents (content, embedding) VALUES ($1, $2)", [
@@ -59,7 +59,7 @@ end
 query = "forest"
 
 query_embedding =
-  Example.fetch_embeddings(model_info, tokenizer, [query])
+  Example.embed(model_info, tokenizer, [query])
   |> List.first()
 
 result =