diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 431ad44..74c962d 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -41,6 +41,7 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }} + PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }} run: bundle exec rspec - name: Upload coverage to Codecov diff --git a/README.md b/README.md index 4a077fc..c3862c1 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ RubyLLM.configure do |config| config.anthropic_api_key = ENV['ANTHROPIC_API_KEY'] config.gemini_api_key = ENV['GEMINI_API_KEY'] config.deepseek_api_key = ENV['DEEPSEEK_API_KEY'] # Optional + config.perplexity_api_key = ENV['PERPLEXITY_API_KEY'] end ``` diff --git a/bin/console b/bin/console index e43cf3d..68f5d37 100755 --- a/bin/console +++ b/bin/console @@ -12,6 +12,7 @@ RubyLLM.configure do |config| config.anthropic_api_key = ENV.fetch('ANTHROPIC_API_KEY', nil) config.gemini_api_key = ENV.fetch('GEMINI_API_KEY', nil) config.deepseek_api_key = ENV.fetch('DEEPSEEK_API_KEY', nil) + config.perplexity_api_key = ENV.fetch('PERPLEXITY_API_KEY', nil) end IRB.start(__FILE__) diff --git a/docs/guides/getting-started.md b/docs/guides/getting-started.md index ff6cb1c..8764dfa 100644 --- a/docs/guides/getting-started.md +++ b/docs/guides/getting-started.md @@ -30,6 +30,7 @@ RubyLLM.configure do |config| config.anthropic_api_key = ENV['ANTHROPIC_API_KEY'] config.gemini_api_key = ENV['GEMINI_API_KEY'] config.deepseek_api_key = ENV['DEEPSEEK_API_KEY'] + config.perplexity_api_key = ENV['PERPLEXITY_API_KEY'] end ``` diff --git a/docs/guides/rails.md b/docs/guides/rails.md index 4108b87..43a1bcb 100644 --- a/docs/guides/rails.md +++ b/docs/guides/rails.md @@ -96,6 +96,7 @@ RubyLLM.configure do |config| config.anthropic_api_key = ENV['ANTHROPIC_API_KEY'] config.gemini_api_key = ENV['GEMINI_API_KEY'] config.deepseek_api_key = ENV['DEEPSEEK_API_KEY'] + config.perplexity_api_key = ENV['PERPLEXITY_API_KEY'] end ``` diff --git a/docs/installation.md b/docs/installation.md index aac4f32..f7735d1 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -55,6 +55,7 @@ RubyLLM.configure do |config| config.anthropic_api_key = ENV['ANTHROPIC_API_KEY'] config.gemini_api_key = ENV['GEMINI_API_KEY'] config.deepseek_api_key = ENV['DEEPSEEK_API_KEY'] + config.perplexity_api_key = ENV['PERPLEXITY_API_KEY'] # Optional: Set default models config.default_model = 'gpt-4o-mini' # Default chat model diff --git a/lib/ruby_llm.rb b/lib/ruby_llm.rb index 0563e13..0ec4d37 100644 --- a/lib/ruby_llm.rb +++ b/lib/ruby_llm.rb @@ -15,7 +15,8 @@ 'llm' => 'LLM', 'openai' => 'OpenAI', 'api' => 'API', - 'deepseek' => 'DeepSeek' + 'deepseek' => 'DeepSeek', + 'perplexity' => 'Perplexity' ) loader.setup @@ -68,6 +69,7 @@ def logger RubyLLM::Provider.register :anthropic, RubyLLM::Providers::Anthropic RubyLLM::Provider.register :gemini, RubyLLM::Providers::Gemini RubyLLM::Provider.register :deepseek, RubyLLM::Providers::DeepSeek +RubyLLM::Provider.register :perplexity, RubyLLM::Providers::Perplexity if defined?(Rails::Railtie) require 'ruby_llm/railtie' diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb index 72a878a..699ad10 100644 --- a/lib/ruby_llm/configuration.rb +++ b/lib/ruby_llm/configuration.rb @@ -14,6 +14,7 @@ class Configuration :anthropic_api_key, :gemini_api_key, :deepseek_api_key, + :perplexity_api_key, :default_model, :default_embedding_model, :default_image_model, diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json index 386e2ad..41821e9 100644 --- a/lib/ruby_llm/models.json +++ b/lib/ruby_llm/models.json @@ -18,9 +18,7 @@ "description": "Model trained to return answers to questions that are grounded in provided sources, along with estimating answerable probability.", "input_token_limit": 7168, "output_token_limit": 1024, - "supported_generation_methods": [ - "generateAnswer" - ] + "supported_generation_methods": ["generateAnswer"] } }, { @@ -61,10 +59,7 @@ "description": "A legacy text-only model optimized for chat conversations", "input_token_limit": 4096, "output_token_limit": 1024, - "supported_generation_methods": [ - "generateMessage", - "countMessageTokens" - ] + "supported_generation_methods": ["generateMessage", "countMessageTokens"] } }, { @@ -344,9 +339,7 @@ "description": "Obtain a distributed representation of a text.", "input_token_limit": 2048, "output_token_limit": 1, - "supported_generation_methods": [ - "embedContent" - ] + "supported_generation_methods": ["embedContent"] } }, { @@ -368,10 +361,7 @@ "description": "Obtain a distributed representation of a text.", "input_token_limit": 1024, "output_token_limit": 1, - "supported_generation_methods": [ - "embedText", - "countTextTokens" - ] + "supported_generation_methods": ["embedText", "countTextTokens"] } }, { @@ -393,10 +383,7 @@ "description": "The original Gemini 1.0 Pro Vision model version which was optimized for image understanding. Gemini 1.0 Pro Vision was deprecated on July 12, 2024. Move to a newer Gemini version.", "input_token_limit": 12288, "output_token_limit": 4096, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -418,10 +405,7 @@ "description": "Alias that points to the most recent stable version of Gemini 1.5 Flash, our fast and versatile multimodal model for scaling across diverse tasks.", "input_token_limit": 1000000, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -573,10 +557,7 @@ "description": "Experimental release (August 27th, 2024) of Gemini 1.5 Flash-8B, our smallest and most cost effective Flash model. Replaced by Gemini-1.5-flash-8b-001 (stable).", "input_token_limit": 1000000, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -598,10 +579,7 @@ "description": "Experimental release (September 24th, 2024) of Gemini 1.5 Flash-8B, our smallest and most cost effective Flash model. Replaced by Gemini-1.5-flash-8b-001 (stable).", "input_token_limit": 1000000, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -649,10 +627,7 @@ "description": "Alias that points to the most recent production (non-experimental) release of Gemini 1.5 Flash, our fast and versatile multimodal model for scaling across diverse tasks.", "input_token_limit": 1000000, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -674,10 +649,7 @@ "description": "Stable version of Gemini 1.5 Pro, our mid-size multimodal model that supports up to 2 million tokens, released in May of 2024.", "input_token_limit": 2000000, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -751,10 +723,7 @@ "description": "Alias that points to the most recent production (non-experimental) release of Gemini 1.5 Pro, our mid-size multimodal model that supports up to 2 million tokens.", "input_token_limit": 2000000, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -776,10 +745,7 @@ "description": "Gemini 2.0 Flash", "input_token_limit": 1048576, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -801,10 +767,7 @@ "description": "Stable version of Gemini 2.0 Flash, our fast and versatile multimodal model for scaling across diverse tasks, released in January of 2025.", "input_token_limit": 1048576, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -852,10 +815,7 @@ "description": "Gemini 2.0 Flash-Lite", "input_token_limit": 1048576, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -877,10 +837,7 @@ "description": "Stable version of Gemini 2.0 Flash Lite", "input_token_limit": 1048576, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -902,10 +859,7 @@ "description": "Preview release (February 5th, 2025) of Gemini 2.0 Flash Lite", "input_token_limit": 1048576, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -927,10 +881,7 @@ "description": "Preview release (February 5th, 2025) of Gemini 2.0 Flash Lite", "input_token_limit": 1048576, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -952,10 +903,7 @@ "description": "Experimental release (January 21st, 2025) of Gemini 2.0 Flash Thinking", "input_token_limit": 1048576, "output_token_limit": 65536, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -977,10 +925,7 @@ "description": "Experimental release (January 21st, 2025) of Gemini 2.0 Flash Thinking", "input_token_limit": 1048576, "output_token_limit": 65536, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -1002,10 +947,7 @@ "description": "Gemini 2.0 Flash Thinking Experimental", "input_token_limit": 1048576, "output_token_limit": 65536, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -1027,10 +969,7 @@ "description": "Experimental release (February 5th, 2025) of Gemini 2.0 Pro", "input_token_limit": 2097152, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -1052,10 +991,7 @@ "description": "Experimental release (February 5th, 2025) of Gemini 2.0 Pro", "input_token_limit": 2097152, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -1077,10 +1013,7 @@ "description": "Experimental release (February 5th, 2025) of Gemini 2.0 Pro", "input_token_limit": 2097152, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -1102,10 +1035,7 @@ "description": "The original Gemini 1.0 Pro Vision model version which was optimized for image understanding. Gemini 1.0 Pro Vision was deprecated on July 12, 2024. Move to a newer Gemini version.", "input_token_limit": 12288, "output_token_limit": 4096, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -1716,9 +1646,7 @@ "description": "Vertex served Imagen 3.0 002 model", "input_token_limit": 480, "output_token_limit": 8192, - "supported_generation_methods": [ - "predict" - ] + "supported_generation_methods": ["predict"] } }, { @@ -1740,10 +1668,7 @@ "description": "Alias that points to the most recent stable version of Gemini 1.5 Pro, our mid-size multimodal model that supports up to 2 million tokens.", "input_token_limit": 32767, "output_token_limit": 8192, - "supported_generation_methods": [ - "generateContent", - "countTokens" - ] + "supported_generation_methods": ["generateContent", "countTokens"] } }, { @@ -1860,6 +1785,115 @@ "owned_by": "system" } }, + { + "id": "r1-1776", + "created_at": null, + "display_name": "R1-1776", + "provider": "perplexity", + "context_window": 128000, + "max_tokens": 4096, + "type": "chat", + "family": "r1_1776", + "supports_vision": false, + "supports_functions": false, + "supports_json_mode": true, + "input_price_per_million": 2.0, + "output_price_per_million": 8.0, + "metadata": { + "description": "R1-1776 is a version of the DeepSeek R1 model that has been post-trained to provide uncensored, unbiased, and factual information." + } + }, + { + "id": "sonar", + "created_at": null, + "display_name": "Sonar", + "provider": "perplexity", + "context_window": 128000, + "max_tokens": 4096, + "type": "chat", + "family": "sonar", + "supports_vision": true, + "supports_functions": false, + "supports_json_mode": true, + "input_price_per_million": 1.0, + "output_price_per_million": 1.0, + "metadata": { + "description": "Lightweight offering with search grounding, quicker and cheaper than Sonar Pro." + } + }, + { + "id": "sonar-deep-research", + "created_at": null, + "display_name": "Sonar Deep Research", + "provider": "perplexity", + "context_window": 128000, + "max_tokens": 4096, + "type": "chat", + "family": "sonar_deep_research", + "supports_vision": false, + "supports_functions": false, + "supports_json_mode": true, + "input_price_per_million": 2.0, + "output_price_per_million": 8.0, + "metadata": { + "description": "Deep Research conducts comprehensive, expert-level research and synthesizes it into accessible, actionable reports.", + "reasoning_price_per_million": 3.0 + } + }, + { + "id": "sonar-pro", + "created_at": null, + "display_name": "Sonar Pro", + "provider": "perplexity", + "context_window": 200000, + "max_tokens": 8192, + "type": "chat", + "family": "sonar_pro", + "supports_vision": true, + "supports_functions": false, + "supports_json_mode": true, + "input_price_per_million": 3.0, + "output_price_per_million": 15.0, + "metadata": { + "description": "Premier search offering with search grounding, supporting advanced queries and follow-ups." + } + }, + { + "id": "sonar-reasoning", + "created_at": null, + "display_name": "Sonar Reasoning", + "provider": "perplexity", + "context_window": 128000, + "max_tokens": 4096, + "type": "chat", + "family": "sonar_reasoning", + "supports_vision": true, + "supports_functions": false, + "supports_json_mode": true, + "input_price_per_million": 1.0, + "output_price_per_million": 5.0, + "metadata": { + "description": "Reasoning model with Chain of Thought (CoT) capabilities." + } + }, + { + "id": "sonar-reasoning-pro", + "created_at": null, + "display_name": "Sonar Reasoning Pro", + "provider": "perplexity", + "context_window": 128000, + "max_tokens": 8192, + "type": "chat", + "family": "sonar_reasoning_pro", + "supports_vision": true, + "supports_functions": false, + "supports_json_mode": true, + "input_price_per_million": 2.0, + "output_price_per_million": 8.0, + "metadata": { + "description": "Premier reasoning offering powered by DeepSeek R1 with Chain of Thought (CoT)." + } + }, { "id": "text-bison-001", "created_at": null, @@ -1905,9 +1939,7 @@ "description": "Obtain a distributed representation of a text.", "input_token_limit": 2048, "output_token_limit": 1, - "supported_generation_methods": [ - "embedContent" - ] + "supported_generation_methods": ["embedContent"] } }, { @@ -2062,4 +2094,4 @@ "owned_by": "openai-internal" } } -] \ No newline at end of file +] diff --git a/lib/ruby_llm/providers/perplexity.rb b/lib/ruby_llm/providers/perplexity.rb new file mode 100644 index 0000000..a365af0 --- /dev/null +++ b/lib/ruby_llm/providers/perplexity.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + # Perplexity API integration. Handles chat completion, streaming, + # and Perplexity's unique features like citations. + module Perplexity + extend Provider + extend Perplexity::Chat + extend Perplexity::Models + extend Perplexity::Streaming + + def self.extended(base) + base.extend(Provider) + base.extend(Perplexity::Chat) + base.extend(Perplexity::Models) + base.extend(Perplexity::Streaming) + end + + module_function + + def api_base + 'https://api.perplexity.ai' + end + + def headers + { + 'Authorization' => "Bearer #{RubyLLM.config.perplexity_api_key}", + 'Content-Type' => 'application/json' + } + end + + def capabilities + Perplexity::Capabilities + end + + def slug + 'perplexity' + end + end + end +end diff --git a/lib/ruby_llm/providers/perplexity/capabilities.rb b/lib/ruby_llm/providers/perplexity/capabilities.rb new file mode 100644 index 0000000..293c297 --- /dev/null +++ b/lib/ruby_llm/providers/perplexity/capabilities.rb @@ -0,0 +1,162 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + module Perplexity + # Determines capabilities and pricing for Perplexity models + module Capabilities + module_function + + # Returns the context window size for the given model ID + # @param model_id [String] the model identifier + # @return [Integer] the context window size in tokens + def context_window_for(model_id) + case model_id + when /sonar/ then 128_000 + when /sonar-(?:deep-research|reasoning-pro|reasoning)/ then 128_000 + when /sonar-pro/ then 200_000 + else 128_000 # Sensible default for Perplexity models + end + end + + # Returns the maximum number of tokens that can be generated + # @param model_id [String] the model identifier + # @return [Integer] the maximum number of tokens + def max_tokens_for(model_id) + case model_id + when /sonar-(?:pro|reasoning-pro)/ then 8_192 + else 4_096 # Default if max_tokens not specified + end + end + + # Returns the price per million tokens for input (cache miss) + # @param model_id [String] the model identifier + # @return [Float] the price per million tokens in USD + def input_price_for(model_id) + PRICES.dig(model_family(model_id), :input) + end + + # Returns the price per million tokens for output + # @param model_id [String] the model identifier + # @return [Float] the price per million tokens in USD + def output_price_for(model_id) + PRICES.dig(model_family(model_id), :output) + end + + # Returns the price per million tokens for reasoning + # @param model_id [String] the model identifier + # @return [Float] the price per million tokens in USD + def reasoning_price_for(model_id) + PRICES.dig(model_family(model_id), :reasoning) || 0.0 + end + + # Returns the price per 1000 searches for the given model + # @param model_id [String] the model identifier + # @return [Float] the price per 1000 searches + def price_per_1000_searches_for(model_id) + PRICES.dig(model_family(model_id), :price_per_1000_searches) || 0.0 + end + + # Determines if the model supports vision capabilities + # @param model_id [String] the model identifier + # @return [Boolean] true if the model supports vision + def supports_vision?(model_id) + # Based on the beta features information + case model_id + when /sonar-reasoning-pro/, /sonar-reasoning/, /sonar-pro/, /sonar/ then true + else false + end + end + + # Determines if the model supports function calling + # @param model_id [String] the model identifier + # @return [Boolean] true if the model supports functions + def supports_functions?(_model_id) + # Perplexity doesn't seem to support function calling + false + end + + # Determines if the model supports JSON mode + def supports_json_mode?(_model_id) + # Based on the structured outputs beta feature + true + end + + # Formats the model ID into a human-readable display name + # @param model_id [String] the model identifier + # @return [String] the formatted display name + def format_display_name(model_id) + case model_id + when 'sonar-deep-research' then 'Sonar Deep Research' + when 'sonar-reasoning-pro' then 'Sonar Reasoning Pro' + when 'sonar-reasoning' then 'Sonar Reasoning' + when 'sonar-pro' then 'Sonar Pro' + when 'sonar' then 'Sonar' + when 'r1-1776' then 'R1-1776' + else + model_id.split('-') + .map(&:capitalize) + .join(' ') + end + end + + # Returns the model type + # @param model_id [String] the model identifier + # @return [String] the model type (e.g., 'chat') + def model_type(_model_id) + 'chat' # Perplexity models are primarily chat-based + end + + # Returns the model family + # @param model_id [String] the model identifier + # @return [Symbol] the model family + def model_family(model_id) + case model_id + when 'sonar-deep-research' then :sonar_deep_research + when 'sonar-reasoning-pro' then :sonar_reasoning_pro + when 'sonar-reasoning' then :sonar_reasoning + when 'sonar-pro' then :sonar_pro + when 'sonar' then :sonar + when 'r1-1776' then :r1_1776 + else :r1_1776 # Default to smallest family + end + end + + # Pricing information for Perplexity models (USD per 1M tokens) + # Note: Hypothetical pricing based on industry norms; adjust with official rates + PRICES = { + sonar_deep_research: { + input: 2.00, # $2.00 per million tokens + output: 8.00, # $8.00 per million tokens + reasoning: 3.00, # $3.00 per million tokens + price_per_1000_searches: 5.00 # $5.00 per 1,000 searches + }, + sonar_reasoning_pro: { + input: 2.00, # $2.00 per million tokens + output: 8.00, # $8.00 per million tokens + price_per_1000_searches: 5.00 # $5.00 per 1,000 searches + }, + sonar_reasoning: { + input: 1.00, # $1.00 per million tokens + output: 5.00, # $5.00 per million tokens + price_per_1000_searches: 5.00 # $5.00 per 1,000 searches + }, + sonar_pro: { + input: 3.00, # $3.00 per million tokens + output: 15.00, # $15.00 per million tokens + price_per_1000_searches: 5.00 # $5.00 per 1,000 searches + }, + sonar: { + input: 1.00, # $1.00 per million tokens + output: 1.00, # $1.00 per million tokens + price_per_1000_searches: 5.00 # $5.00 per 1,000 searches + }, + r1_1776: { + input: 2.00, # $2.00 per million tokens + output: 8.00 # $8.00 per million tokens + } + }.freeze + end + end + end +end diff --git a/lib/ruby_llm/providers/perplexity/chat.rb b/lib/ruby_llm/providers/perplexity/chat.rb new file mode 100644 index 0000000..f503dfc --- /dev/null +++ b/lib/ruby_llm/providers/perplexity/chat.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + module Perplexity + # Chat methods of the Perplexity API integration + module Chat + module_function + + def completion_url + 'chat/completions' + end + + def render_payload(messages, tools:, temperature:, model:, stream: false) + { + model: model, + messages: format_messages(messages), + temperature: temperature, + stream: stream + } + end + + def parse_completion_response(response) + data = response.body + return if data.empty? + + message_data = data.dig('choices', 0, 'message') + return unless message_data + + # Create a message with citations if available + content = message_data['content'] + + Message.new( + role: :assistant, + content: content, + input_tokens: data['usage']['prompt_tokens'], + output_tokens: data['usage']['completion_tokens'], + model_id: data['model'], + metadata: { + citations: data['citations'] + } + ) + end + + def format_messages(messages) + messages.map do |msg| + { + role: msg.role.to_s, + content: msg.content + } + end + end + end + end + end +end diff --git a/lib/ruby_llm/providers/perplexity/models.rb b/lib/ruby_llm/providers/perplexity/models.rb new file mode 100644 index 0000000..abb2ba5 --- /dev/null +++ b/lib/ruby_llm/providers/perplexity/models.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + module Perplexity + # Models methods of the Perplexity API integration + module Models + module_function + + def models_url + # Perplexity doesn't have a models endpoint, so we'll return a static list + nil + end + + def parse_list_models_response(_response, slug, capabilities) + # Since Perplexity doesn't have a models endpoint, we'll return a static list + [ + create_model_info('sonar', slug, capabilities), + create_model_info('sonar-pro', slug, capabilities), + create_model_info('sonar-reasoning', slug, capabilities), + create_model_info('sonar-reasoning-pro', slug, capabilities), + create_model_info('sonar-deep-research', slug, capabilities), + create_model_info('r1-1776', slug, capabilities) + ] + end + + def create_model_info(id, slug, capabilities) + ModelInfo.new( + id: id, + created_at: Time.now, + display_name: capabilities.format_display_name(id), + provider: slug, + type: capabilities.model_type(id), + family: capabilities.model_family(id).to_s, + context_window: capabilities.context_window_for(id), + max_tokens: capabilities.max_tokens_for(id), + supports_vision: capabilities.supports_vision?(id), + supports_functions: capabilities.supports_functions?(id), + supports_json_mode: capabilities.supports_json_mode?(id), + input_price_per_million: capabilities.input_price_for(id), + output_price_per_million: capabilities.output_price_for(id), + metadata: { + reasoning_price_per_million: capabilities.reasoning_price_for(id) + } + ) + end + end + end + end +end diff --git a/lib/ruby_llm/providers/perplexity/streaming.rb b/lib/ruby_llm/providers/perplexity/streaming.rb new file mode 100644 index 0000000..3975db5 --- /dev/null +++ b/lib/ruby_llm/providers/perplexity/streaming.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + module Perplexity + # Streaming methods of the Perplexity API integration + module Streaming + module_function + + def stream_url + completion_url + end + + def handle_stream(&block) + to_json_stream do |data| + block.call( + Chunk.new( + role: :assistant, + model_id: data['model'], + content: data.dig('choices', 0, 'delta', 'content'), + input_tokens: data.dig('usage', 'prompt_tokens'), + output_tokens: data.dig('usage', 'completion_tokens') + ) + ) + end + end + end + end + end +end diff --git a/lib/tasks/models.rake b/lib/tasks/models.rake index 73a34ee..304bd5a 100644 --- a/lib/tasks/models.rake +++ b/lib/tasks/models.rake @@ -19,6 +19,10 @@ PROVIDER_DOCS = { }, anthropic: { models: 'https://docs.anthropic.com/en/docs/about-claude/models/all-models' + }, + perplexity: { + models: 'https://docs.perplexity.ai/guides/model-cards', + pricing: 'https://docs.perplexity.ai/guides/pricing' } }.freeze @@ -72,6 +76,7 @@ namespace :models do # rubocop:disable Metrics/BlockLength config.anthropic_api_key = ENV.fetch('ANTHROPIC_API_KEY') config.gemini_api_key = ENV.fetch('GEMINI_API_KEY') config.deepseek_api_key = ENV.fetch('DEEPSEEK_API_KEY') + config.perplexity_api_key = ENV['PERPLEXITY_API_KEY'] end # Refresh models (now returns self instead of models array) diff --git a/spec/ruby_llm/chat_spec.rb b/spec/ruby_llm/chat_spec.rb index 7ced4f9..c656905 100644 --- a/spec/ruby_llm/chat_spec.rb +++ b/spec/ruby_llm/chat_spec.rb @@ -11,7 +11,8 @@ 'claude-3-5-haiku-20241022', 'gemini-2.0-flash', 'deepseek-chat', - 'gpt-4o-mini' + 'gpt-4o-mini', + 'sonar' ].each do |model| it "#{model} can have a basic conversation" do # rubocop:disable RSpec/ExampleLength,RSpec/MultipleExpectations chat = RubyLLM.chat(model: model) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index d35bfae..57be8d0 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -40,6 +40,7 @@ config.anthropic_api_key = ENV.fetch('ANTHROPIC_API_KEY') config.gemini_api_key = ENV.fetch('GEMINI_API_KEY') config.deepseek_api_key = ENV.fetch('DEEPSEEK_API_KEY') + config.perplexity_api_key = ENV.fetch('PERPLEXITY_API_KEY') config.max_retries = 50 end end