From acfd77128a5d2a88fd4056cb59668521756a852b Mon Sep 17 00:00:00 2001 From: Bruno Perez Date: Tue, 2 Jun 2026 18:14:33 +0200 Subject: [PATCH] feat: add Cohere, Perplexity, and Meta providers Add Cohere (8 Command models), Perplexity (4 Sonar models), and Meta Llama (4 models) to the catalog. Also expand Alibaba Qwen from 1 to 8 models, add the missing OpenAI reasoning models (gpt-5.3-codex, gpt-5.4-pro, gpt-5.4-nano, gpt-5.5-pro, o3-pro, plus pro subscription variants), add MiniMax M3, and add the prompt_mode reasoning control to the Magistral models. Every parameter is verified against the provider's official API reference. --- models/alibaba/qwen-flash.yaml | 44 +++++++ models/alibaba/qwen-plus.yaml | 44 +++++++ models/alibaba/qwen3-coder-flash.yaml | 38 ++++++ models/alibaba/qwen3-coder-plus.yaml | 38 ++++++ models/alibaba/qwen3-max.yaml | 44 +++++++ models/alibaba/qwen3.5-flash.yaml | 44 +++++++ models/alibaba/qwq-plus.yaml | 38 ++++++ models/cohere/command-a-03-2025.yaml | 102 +++++++++++++++ models/cohere/command-a-plus-05-2026.yaml | 102 +++++++++++++++ .../cohere/command-a-reasoning-08-2025.yaml | 121 ++++++++++++++++++ .../cohere/command-a-translate-08-2025.yaml | 102 +++++++++++++++ models/cohere/command-a-vision-07-2025.yaml | 102 +++++++++++++++ models/cohere/command-r-08-2024.yaml | 95 ++++++++++++++ models/cohere/command-r-plus-08-2024.yaml | 95 ++++++++++++++ models/cohere/command-r7b-12-2024.yaml | 102 +++++++++++++++ models/meta/Llama-3.3-70B-Instruct.yaml | 50 ++++++++ models/meta/Llama-3.3-8B-Instruct.yaml | 50 ++++++++ ...lama-4-Maverick-17B-128E-Instruct-FP8.yaml | 50 ++++++++ .../Llama-4-Scout-17B-16E-Instruct-FP8.yaml | 50 ++++++++ models/minimax/MiniMax-M3-subscription.yaml | 34 +++++ models/minimax/minimax-m3.yaml | 40 ++++++ models/mistral/magistral-medium-latest.yaml | 7 + models/mistral/magistral-small-latest.yaml | 7 + models/openai/gpt-5.3-codex.yaml | 24 ++++ models/openai/gpt-5.4-nano.yaml | 25 ++++ models/openai/gpt-5.4-pro-subscription.yaml | 36 ++++++ models/openai/gpt-5.4-pro.yaml | 23 ++++ models/openai/gpt-5.5-pro-subscription.yaml | 36 ++++++ models/openai/gpt-5.5-pro.yaml | 23 ++++ models/openai/o3-pro.yaml | 24 ++++ models/perplexity/sonar-deep-research.yaml | 98 ++++++++++++++ models/perplexity/sonar-pro.yaml | 94 ++++++++++++++ models/perplexity/sonar-reasoning-pro.yaml | 94 ++++++++++++++ models/perplexity/sonar.yaml | 94 ++++++++++++++ src/client/logos/meta.svg | 3 + 35 files changed, 1973 insertions(+) create mode 100644 models/alibaba/qwen-flash.yaml create mode 100644 models/alibaba/qwen-plus.yaml create mode 100644 models/alibaba/qwen3-coder-flash.yaml create mode 100644 models/alibaba/qwen3-coder-plus.yaml create mode 100644 models/alibaba/qwen3-max.yaml create mode 100644 models/alibaba/qwen3.5-flash.yaml create mode 100644 models/alibaba/qwq-plus.yaml create mode 100644 models/cohere/command-a-03-2025.yaml create mode 100644 models/cohere/command-a-plus-05-2026.yaml create mode 100644 models/cohere/command-a-reasoning-08-2025.yaml create mode 100644 models/cohere/command-a-translate-08-2025.yaml create mode 100644 models/cohere/command-a-vision-07-2025.yaml create mode 100644 models/cohere/command-r-08-2024.yaml create mode 100644 models/cohere/command-r-plus-08-2024.yaml create mode 100644 models/cohere/command-r7b-12-2024.yaml create mode 100644 models/meta/Llama-3.3-70B-Instruct.yaml create mode 100644 models/meta/Llama-3.3-8B-Instruct.yaml create mode 100644 models/meta/Llama-4-Maverick-17B-128E-Instruct-FP8.yaml create mode 100644 models/meta/Llama-4-Scout-17B-16E-Instruct-FP8.yaml create mode 100644 models/minimax/MiniMax-M3-subscription.yaml create mode 100644 models/minimax/minimax-m3.yaml create mode 100644 models/openai/gpt-5.3-codex.yaml create mode 100644 models/openai/gpt-5.4-nano.yaml create mode 100644 models/openai/gpt-5.4-pro-subscription.yaml create mode 100644 models/openai/gpt-5.4-pro.yaml create mode 100644 models/openai/gpt-5.5-pro-subscription.yaml create mode 100644 models/openai/gpt-5.5-pro.yaml create mode 100644 models/openai/o3-pro.yaml create mode 100644 models/perplexity/sonar-deep-research.yaml create mode 100644 models/perplexity/sonar-pro.yaml create mode 100644 models/perplexity/sonar-reasoning-pro.yaml create mode 100644 models/perplexity/sonar.yaml create mode 100644 src/client/logos/meta.svg diff --git a/models/alibaba/qwen-flash.yaml b/models/alibaba/qwen-flash.yaml new file mode 100644 index 0000000..5d9629f --- /dev/null +++ b/models/alibaba/qwen-flash.yaml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: alibaba +authType: api_key +model: qwen-flash +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: extra_body.top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + default: 20 + range: + min: 1 + group: sampling + - path: extra_body.chat_template_kwargs.enable_thinking + type: boolean + label: Enable thinking + description: Controls Qwen3 thinking mode when using OpenAI-compatible clients that pass provider-specific extra body fields. + default: true + group: reasoning diff --git a/models/alibaba/qwen-plus.yaml b/models/alibaba/qwen-plus.yaml new file mode 100644 index 0000000..5813fbe --- /dev/null +++ b/models/alibaba/qwen-plus.yaml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: alibaba +authType: api_key +model: qwen-plus +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: extra_body.top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + default: 20 + range: + min: 1 + group: sampling + - path: extra_body.chat_template_kwargs.enable_thinking + type: boolean + label: Enable thinking + description: Controls Qwen3 thinking mode when using OpenAI-compatible clients that pass provider-specific extra body fields. + default: true + group: reasoning diff --git a/models/alibaba/qwen3-coder-flash.yaml b/models/alibaba/qwen3-coder-flash.yaml new file mode 100644 index 0000000..4d9330f --- /dev/null +++ b/models/alibaba/qwen3-coder-flash.yaml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: alibaba +authType: api_key +model: qwen3-coder-flash +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: extra_body.top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + default: 20 + range: + min: 1 + group: sampling diff --git a/models/alibaba/qwen3-coder-plus.yaml b/models/alibaba/qwen3-coder-plus.yaml new file mode 100644 index 0000000..8239a2f --- /dev/null +++ b/models/alibaba/qwen3-coder-plus.yaml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: alibaba +authType: api_key +model: qwen3-coder-plus +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: extra_body.top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + default: 20 + range: + min: 1 + group: sampling diff --git a/models/alibaba/qwen3-max.yaml b/models/alibaba/qwen3-max.yaml new file mode 100644 index 0000000..270e00a --- /dev/null +++ b/models/alibaba/qwen3-max.yaml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: alibaba +authType: api_key +model: qwen3-max +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: extra_body.top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + default: 20 + range: + min: 1 + group: sampling + - path: extra_body.chat_template_kwargs.enable_thinking + type: boolean + label: Enable thinking + description: Controls Qwen3 thinking mode when using OpenAI-compatible clients that pass provider-specific extra body fields. + default: false + group: reasoning diff --git a/models/alibaba/qwen3.5-flash.yaml b/models/alibaba/qwen3.5-flash.yaml new file mode 100644 index 0000000..6b41cb3 --- /dev/null +++ b/models/alibaba/qwen3.5-flash.yaml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: alibaba +authType: api_key +model: qwen3.5-flash +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: extra_body.top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + default: 20 + range: + min: 1 + group: sampling + - path: extra_body.chat_template_kwargs.enable_thinking + type: boolean + label: Enable thinking + description: Controls Qwen3 thinking mode when using OpenAI-compatible clients that pass provider-specific extra body fields. + default: true + group: reasoning diff --git a/models/alibaba/qwq-plus.yaml b/models/alibaba/qwq-plus.yaml new file mode 100644 index 0000000..8b68323 --- /dev/null +++ b/models/alibaba/qwq-plus.yaml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: alibaba +authType: api_key +model: qwq-plus +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: extra_body.top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + default: 20 + range: + min: 1 + group: sampling diff --git a/models/cohere/command-a-03-2025.yaml b/models/cohere/command-a-03-2025.yaml new file mode 100644 index 0000000..3b80d13 --- /dev/null +++ b/models/cohere/command-a-03-2025.yaml @@ -0,0 +1,102 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: cohere +authType: api_key +model: command-a-03-2025 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: stop_sequences + type: string + label: Stop sequences + description: Stops generation when one of these sequences is detected; up to five are allowed. + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.3 + range: + min: 0 + step: 0.1 + group: sampling + - path: p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.75 + range: + min: 0.01 + max: 0.99 + step: 0.01 + group: sampling + - path: k + type: integer + label: Top K + description: Limits sampling to the K most likely tokens; 0 disables top-k sampling. + default: 0 + range: + min: 0 + max: 500 + group: sampling + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes tokens proportional to how often they have already appeared to reduce repetition. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Penalizes tokens that have already appeared to encourage a wider variety of content. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: seed + type: integer + label: Seed + description: Seed used for best-effort deterministic sampling when reproducible outputs are desired. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or JSON object output. + default: text + values: + - text + - json_object + group: output_format + - path: logprobs + type: boolean + label: Log probabilities + description: Controls whether the response includes log probabilities for the generated tokens. + default: false + group: observability + - path: tool_choice + type: enum + label: Tool choice + description: Forces the model to either call a tool or skip tool calls for this request. + values: + - REQUIRED + - NONE + group: tooling + - path: safety_mode + type: enum + label: Safety mode + description: Controls Cohere's built-in safety instructions applied to the generation. + default: CONTEXTUAL + values: + - CONTEXTUAL + - STRICT + group: provider_metadata diff --git a/models/cohere/command-a-plus-05-2026.yaml b/models/cohere/command-a-plus-05-2026.yaml new file mode 100644 index 0000000..45c1517 --- /dev/null +++ b/models/cohere/command-a-plus-05-2026.yaml @@ -0,0 +1,102 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: cohere +authType: api_key +model: command-a-plus-05-2026 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: stop_sequences + type: string + label: Stop sequences + description: Stops generation when one of these sequences is detected; up to five are allowed. + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.3 + range: + min: 0 + step: 0.1 + group: sampling + - path: p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.75 + range: + min: 0.01 + max: 0.99 + step: 0.01 + group: sampling + - path: k + type: integer + label: Top K + description: Limits sampling to the K most likely tokens; 0 disables top-k sampling. + default: 0 + range: + min: 0 + max: 500 + group: sampling + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes tokens proportional to how often they have already appeared to reduce repetition. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Penalizes tokens that have already appeared to encourage a wider variety of content. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: seed + type: integer + label: Seed + description: Seed used for best-effort deterministic sampling when reproducible outputs are desired. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or JSON object output. + default: text + values: + - text + - json_object + group: output_format + - path: logprobs + type: boolean + label: Log probabilities + description: Controls whether the response includes log probabilities for the generated tokens. + default: false + group: observability + - path: tool_choice + type: enum + label: Tool choice + description: Forces the model to either call a tool or skip tool calls for this request. + values: + - REQUIRED + - NONE + group: tooling + - path: safety_mode + type: enum + label: Safety mode + description: Controls Cohere's built-in safety instructions applied to the generation. + default: CONTEXTUAL + values: + - CONTEXTUAL + - STRICT + group: provider_metadata diff --git a/models/cohere/command-a-reasoning-08-2025.yaml b/models/cohere/command-a-reasoning-08-2025.yaml new file mode 100644 index 0000000..a4ae0df --- /dev/null +++ b/models/cohere/command-a-reasoning-08-2025.yaml @@ -0,0 +1,121 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: cohere +authType: api_key +model: command-a-reasoning-08-2025 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: stop_sequences + type: string + label: Stop sequences + description: Stops generation when one of these sequences is detected; up to five are allowed. + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.3 + range: + min: 0 + step: 0.1 + group: sampling + - path: p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.75 + range: + min: 0.01 + max: 0.99 + step: 0.01 + group: sampling + - path: k + type: integer + label: Top K + description: Limits sampling to the K most likely tokens; 0 disables top-k sampling. + default: 0 + range: + min: 0 + max: 500 + group: sampling + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes tokens proportional to how often they have already appeared to reduce repetition. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Penalizes tokens that have already appeared to encourage a wider variety of content. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: seed + type: integer + label: Seed + description: Seed used for best-effort deterministic sampling when reproducible outputs are desired. + group: sampling + - path: thinking.type + type: enum + label: Thinking mode + description: Controls whether the model reasons step by step before producing its final answer. + default: disabled + values: + - enabled + - disabled + group: reasoning + - path: thinking.token_budget + type: integer + label: Thinking token budget + description: Maximum number of tokens the model may spend on reasoning before answering. + range: + min: 1 + group: reasoning + applicability: + only: + thinking.type: enabled + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or JSON object output. + default: text + values: + - text + - json_object + group: output_format + - path: logprobs + type: boolean + label: Log probabilities + description: Controls whether the response includes log probabilities for the generated tokens. + default: false + group: observability + - path: tool_choice + type: enum + label: Tool choice + description: Forces the model to either call a tool or skip tool calls for this request. + values: + - REQUIRED + - NONE + group: tooling + - path: safety_mode + type: enum + label: Safety mode + description: Controls Cohere's built-in safety instructions applied to the generation. + default: CONTEXTUAL + values: + - CONTEXTUAL + - STRICT + group: provider_metadata diff --git a/models/cohere/command-a-translate-08-2025.yaml b/models/cohere/command-a-translate-08-2025.yaml new file mode 100644 index 0000000..dd66fd8 --- /dev/null +++ b/models/cohere/command-a-translate-08-2025.yaml @@ -0,0 +1,102 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: cohere +authType: api_key +model: command-a-translate-08-2025 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: stop_sequences + type: string + label: Stop sequences + description: Stops generation when one of these sequences is detected; up to five are allowed. + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.3 + range: + min: 0 + step: 0.1 + group: sampling + - path: p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.75 + range: + min: 0.01 + max: 0.99 + step: 0.01 + group: sampling + - path: k + type: integer + label: Top K + description: Limits sampling to the K most likely tokens; 0 disables top-k sampling. + default: 0 + range: + min: 0 + max: 500 + group: sampling + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes tokens proportional to how often they have already appeared to reduce repetition. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Penalizes tokens that have already appeared to encourage a wider variety of content. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: seed + type: integer + label: Seed + description: Seed used for best-effort deterministic sampling when reproducible outputs are desired. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or JSON object output. + default: text + values: + - text + - json_object + group: output_format + - path: logprobs + type: boolean + label: Log probabilities + description: Controls whether the response includes log probabilities for the generated tokens. + default: false + group: observability + - path: tool_choice + type: enum + label: Tool choice + description: Forces the model to either call a tool or skip tool calls for this request. + values: + - REQUIRED + - NONE + group: tooling + - path: safety_mode + type: enum + label: Safety mode + description: Controls Cohere's built-in safety instructions applied to the generation. + default: CONTEXTUAL + values: + - CONTEXTUAL + - STRICT + group: provider_metadata diff --git a/models/cohere/command-a-vision-07-2025.yaml b/models/cohere/command-a-vision-07-2025.yaml new file mode 100644 index 0000000..e9ece3c --- /dev/null +++ b/models/cohere/command-a-vision-07-2025.yaml @@ -0,0 +1,102 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: cohere +authType: api_key +model: command-a-vision-07-2025 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: stop_sequences + type: string + label: Stop sequences + description: Stops generation when one of these sequences is detected; up to five are allowed. + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.3 + range: + min: 0 + step: 0.1 + group: sampling + - path: p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.75 + range: + min: 0.01 + max: 0.99 + step: 0.01 + group: sampling + - path: k + type: integer + label: Top K + description: Limits sampling to the K most likely tokens; 0 disables top-k sampling. + default: 0 + range: + min: 0 + max: 500 + group: sampling + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes tokens proportional to how often they have already appeared to reduce repetition. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Penalizes tokens that have already appeared to encourage a wider variety of content. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: seed + type: integer + label: Seed + description: Seed used for best-effort deterministic sampling when reproducible outputs are desired. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or JSON object output. + default: text + values: + - text + - json_object + group: output_format + - path: logprobs + type: boolean + label: Log probabilities + description: Controls whether the response includes log probabilities for the generated tokens. + default: false + group: observability + - path: tool_choice + type: enum + label: Tool choice + description: Forces the model to either call a tool or skip tool calls for this request. + values: + - REQUIRED + - NONE + group: tooling + - path: safety_mode + type: enum + label: Safety mode + description: Controls Cohere's built-in safety instructions applied to the generation. + default: CONTEXTUAL + values: + - CONTEXTUAL + - STRICT + group: provider_metadata diff --git a/models/cohere/command-r-08-2024.yaml b/models/cohere/command-r-08-2024.yaml new file mode 100644 index 0000000..afe92d8 --- /dev/null +++ b/models/cohere/command-r-08-2024.yaml @@ -0,0 +1,95 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: cohere +authType: api_key +model: command-r-08-2024 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: stop_sequences + type: string + label: Stop sequences + description: Stops generation when one of these sequences is detected; up to five are allowed. + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.3 + range: + min: 0 + step: 0.1 + group: sampling + - path: p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.75 + range: + min: 0.01 + max: 0.99 + step: 0.01 + group: sampling + - path: k + type: integer + label: Top K + description: Limits sampling to the K most likely tokens; 0 disables top-k sampling. + default: 0 + range: + min: 0 + max: 500 + group: sampling + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes tokens proportional to how often they have already appeared to reduce repetition. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Penalizes tokens that have already appeared to encourage a wider variety of content. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: seed + type: integer + label: Seed + description: Seed used for best-effort deterministic sampling when reproducible outputs are desired. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or JSON object output. + default: text + values: + - text + - json_object + group: output_format + - path: logprobs + type: boolean + label: Log probabilities + description: Controls whether the response includes log probabilities for the generated tokens. + default: false + group: observability + - path: safety_mode + type: enum + label: Safety mode + description: Controls Cohere's built-in safety instructions applied to the generation. + default: CONTEXTUAL + values: + - CONTEXTUAL + - STRICT + - OFF + group: provider_metadata diff --git a/models/cohere/command-r-plus-08-2024.yaml b/models/cohere/command-r-plus-08-2024.yaml new file mode 100644 index 0000000..2d00546 --- /dev/null +++ b/models/cohere/command-r-plus-08-2024.yaml @@ -0,0 +1,95 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: cohere +authType: api_key +model: command-r-plus-08-2024 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: stop_sequences + type: string + label: Stop sequences + description: Stops generation when one of these sequences is detected; up to five are allowed. + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.3 + range: + min: 0 + step: 0.1 + group: sampling + - path: p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.75 + range: + min: 0.01 + max: 0.99 + step: 0.01 + group: sampling + - path: k + type: integer + label: Top K + description: Limits sampling to the K most likely tokens; 0 disables top-k sampling. + default: 0 + range: + min: 0 + max: 500 + group: sampling + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes tokens proportional to how often they have already appeared to reduce repetition. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Penalizes tokens that have already appeared to encourage a wider variety of content. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: seed + type: integer + label: Seed + description: Seed used for best-effort deterministic sampling when reproducible outputs are desired. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or JSON object output. + default: text + values: + - text + - json_object + group: output_format + - path: logprobs + type: boolean + label: Log probabilities + description: Controls whether the response includes log probabilities for the generated tokens. + default: false + group: observability + - path: safety_mode + type: enum + label: Safety mode + description: Controls Cohere's built-in safety instructions applied to the generation. + default: CONTEXTUAL + values: + - CONTEXTUAL + - STRICT + - OFF + group: provider_metadata diff --git a/models/cohere/command-r7b-12-2024.yaml b/models/cohere/command-r7b-12-2024.yaml new file mode 100644 index 0000000..2361400 --- /dev/null +++ b/models/cohere/command-r7b-12-2024.yaml @@ -0,0 +1,102 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: cohere +authType: api_key +model: command-r7b-12-2024 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: stop_sequences + type: string + label: Stop sequences + description: Stops generation when one of these sequences is detected; up to five are allowed. + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0.3 + range: + min: 0 + step: 0.1 + group: sampling + - path: p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.75 + range: + min: 0.01 + max: 0.99 + step: 0.01 + group: sampling + - path: k + type: integer + label: Top K + description: Limits sampling to the K most likely tokens; 0 disables top-k sampling. + default: 0 + range: + min: 0 + max: 500 + group: sampling + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes tokens proportional to how often they have already appeared to reduce repetition. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Penalizes tokens that have already appeared to encourage a wider variety of content. + default: 0 + range: + min: 0 + max: 1 + step: 0.1 + group: sampling + - path: seed + type: integer + label: Seed + description: Seed used for best-effort deterministic sampling when reproducible outputs are desired. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or JSON object output. + default: text + values: + - text + - json_object + group: output_format + - path: logprobs + type: boolean + label: Log probabilities + description: Controls whether the response includes log probabilities for the generated tokens. + default: false + group: observability + - path: tool_choice + type: enum + label: Tool choice + description: Forces the model to either call a tool or skip tool calls for this request. + values: + - REQUIRED + - NONE + group: tooling + - path: safety_mode + type: enum + label: Safety mode + description: Controls Cohere's built-in safety instructions applied to the generation. + default: CONTEXTUAL + values: + - CONTEXTUAL + - STRICT + group: provider_metadata diff --git a/models/meta/Llama-3.3-70B-Instruct.yaml b/models/meta/Llama-3.3-70B-Instruct.yaml new file mode 100644 index 0000000..4471717 --- /dev/null +++ b/models/meta/Llama-3.3-70B-Instruct.yaml @@ -0,0 +1,50 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: meta +authType: api_key +model: Llama-3.3-70B-Instruct +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + group: sampling + - path: top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + group: sampling + - path: repetition_penalty + type: number + label: Repetition penalty + description: Penalizes tokens that have already appeared to reduce repetition in the output. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or a schema-constrained JSON object. + default: text + values: + - text + - json_schema + group: output_format + - path: tool_choice + type: enum + label: Tool choice + description: Controls whether the model may call tools, must call one, or skips tool calls. + values: + - auto + - none + - required + group: tooling diff --git a/models/meta/Llama-3.3-8B-Instruct.yaml b/models/meta/Llama-3.3-8B-Instruct.yaml new file mode 100644 index 0000000..b9b144d --- /dev/null +++ b/models/meta/Llama-3.3-8B-Instruct.yaml @@ -0,0 +1,50 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: meta +authType: api_key +model: Llama-3.3-8B-Instruct +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + group: sampling + - path: top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + group: sampling + - path: repetition_penalty + type: number + label: Repetition penalty + description: Penalizes tokens that have already appeared to reduce repetition in the output. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or a schema-constrained JSON object. + default: text + values: + - text + - json_schema + group: output_format + - path: tool_choice + type: enum + label: Tool choice + description: Controls whether the model may call tools, must call one, or skips tool calls. + values: + - auto + - none + - required + group: tooling diff --git a/models/meta/Llama-4-Maverick-17B-128E-Instruct-FP8.yaml b/models/meta/Llama-4-Maverick-17B-128E-Instruct-FP8.yaml new file mode 100644 index 0000000..88403c7 --- /dev/null +++ b/models/meta/Llama-4-Maverick-17B-128E-Instruct-FP8.yaml @@ -0,0 +1,50 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: meta +authType: api_key +model: Llama-4-Maverick-17B-128E-Instruct-FP8 +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + group: sampling + - path: top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + group: sampling + - path: repetition_penalty + type: number + label: Repetition penalty + description: Penalizes tokens that have already appeared to reduce repetition in the output. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or a schema-constrained JSON object. + default: text + values: + - text + - json_schema + group: output_format + - path: tool_choice + type: enum + label: Tool choice + description: Controls whether the model may call tools, must call one, or skips tool calls. + values: + - auto + - none + - required + group: tooling diff --git a/models/meta/Llama-4-Scout-17B-16E-Instruct-FP8.yaml b/models/meta/Llama-4-Scout-17B-16E-Instruct-FP8.yaml new file mode 100644 index 0000000..770543a --- /dev/null +++ b/models/meta/Llama-4-Scout-17B-16E-Instruct-FP8.yaml @@ -0,0 +1,50 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: meta +authType: api_key +model: Llama-4-Scout-17B-16E-Instruct-FP8 +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + group: sampling + - path: top_k + type: integer + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + group: sampling + - path: repetition_penalty + type: number + label: Repetition penalty + description: Penalizes tokens that have already appeared to reduce repetition in the output. + group: sampling + - path: response_format.type + type: enum + label: Response format + description: Controls whether the model returns normal text or a schema-constrained JSON object. + default: text + values: + - text + - json_schema + group: output_format + - path: tool_choice + type: enum + label: Tool choice + description: Controls whether the model may call tools, must call one, or skips tool calls. + values: + - auto + - none + - required + group: tooling diff --git a/models/minimax/MiniMax-M3-subscription.yaml b/models/minimax/MiniMax-M3-subscription.yaml new file mode 100644 index 0000000..ce72d1e --- /dev/null +++ b/models/minimax/MiniMax-M3-subscription.yaml @@ -0,0 +1,34 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: minimax +authType: subscription +model: MiniMax-M3 +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate in the response. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: >- + Controls randomness. Lower values make outputs more focused; higher values make them more + varied. Values must be greater than 0 and at most 1. + default: 1 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling diff --git a/models/minimax/minimax-m3.yaml b/models/minimax/minimax-m3.yaml new file mode 100644 index 0000000..5f7fa67 --- /dev/null +++ b/models/minimax/minimax-m3.yaml @@ -0,0 +1,40 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: minimax +authType: api_key +model: minimax-m3 +params: + - path: max_completion_tokens + type: integer + label: Max completion tokens + description: Maximum number of tokens to generate in the completion. + range: + min: 1 + group: generation_length + - path: temperature + type: number + label: Temperature + description: >- + Controls randomness. Lower values make outputs more focused; higher values make them more + varied. Values must be greater than 0 and at most 1. + default: 1 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + default: 0.95 + range: + min: 0.01 + max: 1 + step: 0.01 + group: sampling + - path: reasoning_split + type: boolean + label: Split reasoning + description: Returns the model's reasoning in a separate reasoning_details field instead of inline with the response. + default: false + group: reasoning diff --git a/models/mistral/magistral-medium-latest.yaml b/models/mistral/magistral-medium-latest.yaml index cdc2816..389104d 100644 --- a/models/mistral/magistral-medium-latest.yaml +++ b/models/mistral/magistral-medium-latest.yaml @@ -61,6 +61,13 @@ params: max: 2 step: 0.1 group: sampling + - path: prompt_mode + type: enum + label: Prompt mode + description: Enables Mistral's reasoning system prompt; leave unset to disable the default reasoning behavior. + values: + - reasoning + group: reasoning - path: response_format.type type: enum label: Response format diff --git a/models/mistral/magistral-small-latest.yaml b/models/mistral/magistral-small-latest.yaml index c3c76c9..1b6bcbe 100644 --- a/models/mistral/magistral-small-latest.yaml +++ b/models/mistral/magistral-small-latest.yaml @@ -61,6 +61,13 @@ params: max: 2 step: 0.1 group: sampling + - path: prompt_mode + type: enum + label: Prompt mode + description: Enables Mistral's reasoning system prompt; leave unset to disable the default reasoning behavior. + values: + - reasoning + group: reasoning - path: response_format.type type: enum label: Response format diff --git a/models/openai/gpt-5.3-codex.yaml b/models/openai/gpt-5.3-codex.yaml new file mode 100644 index 0000000..fbdd526 --- /dev/null +++ b/models/openai/gpt-5.3-codex.yaml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: openai +authType: api_key +model: gpt-5.3-codex +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + default: 4096 + range: + min: 16 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls how much reasoning the model should perform before producing an answer. + default: medium + values: + - low + - medium + - high + - xhigh + group: reasoning diff --git a/models/openai/gpt-5.4-nano.yaml b/models/openai/gpt-5.4-nano.yaml new file mode 100644 index 0000000..8d5ae1d --- /dev/null +++ b/models/openai/gpt-5.4-nano.yaml @@ -0,0 +1,25 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: openai +authType: api_key +model: gpt-5.4-nano +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + default: 4096 + range: + min: 16 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls how much reasoning the model should perform before producing an answer. + default: medium + values: + - none + - low + - medium + - high + - xhigh + group: reasoning diff --git a/models/openai/gpt-5.4-pro-subscription.yaml b/models/openai/gpt-5.4-pro-subscription.yaml new file mode 100644 index 0000000..52fe618 --- /dev/null +++ b/models/openai/gpt-5.4-pro-subscription.yaml @@ -0,0 +1,36 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: openai +authType: subscription +model: gpt-5.4-pro +params: + - path: reasoning.effort + type: enum + label: Reasoning effort + description: Controls how much reasoning the model should perform before producing an answer. + default: medium + values: + - medium + - high + - xhigh + group: reasoning + - path: reasoning.summary + type: enum + label: Reasoning summary + description: Controls the level of reasoning summary returned with the response. + default: auto + values: + - auto + - concise + - detailed + - none + group: reasoning + - path: text.verbosity + type: enum + label: Verbosity + description: Controls how concise or detailed the model's final text response should be. + default: medium + values: + - low + - medium + - high + group: output_format diff --git a/models/openai/gpt-5.4-pro.yaml b/models/openai/gpt-5.4-pro.yaml new file mode 100644 index 0000000..ecc40b9 --- /dev/null +++ b/models/openai/gpt-5.4-pro.yaml @@ -0,0 +1,23 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: openai +authType: api_key +model: gpt-5.4-pro +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + default: 4096 + range: + min: 16 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls how much reasoning the model should perform before producing an answer. + default: medium + values: + - medium + - high + - xhigh + group: reasoning diff --git a/models/openai/gpt-5.5-pro-subscription.yaml b/models/openai/gpt-5.5-pro-subscription.yaml new file mode 100644 index 0000000..9b22ae4 --- /dev/null +++ b/models/openai/gpt-5.5-pro-subscription.yaml @@ -0,0 +1,36 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: openai +authType: subscription +model: gpt-5.5-pro +params: + - path: reasoning.effort + type: enum + label: Reasoning effort + description: Controls how much reasoning the model should perform before producing an answer. + default: medium + values: + - medium + - high + - xhigh + group: reasoning + - path: reasoning.summary + type: enum + label: Reasoning summary + description: Controls the level of reasoning summary returned with the response. + default: auto + values: + - auto + - concise + - detailed + - none + group: reasoning + - path: text.verbosity + type: enum + label: Verbosity + description: Controls how concise or detailed the model's final text response should be. + default: medium + values: + - low + - medium + - high + group: output_format diff --git a/models/openai/gpt-5.5-pro.yaml b/models/openai/gpt-5.5-pro.yaml new file mode 100644 index 0000000..3b6e6f4 --- /dev/null +++ b/models/openai/gpt-5.5-pro.yaml @@ -0,0 +1,23 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: openai +authType: api_key +model: gpt-5.5-pro +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + default: 4096 + range: + min: 16 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls how much reasoning the model should perform before producing an answer. + default: medium + values: + - medium + - high + - xhigh + group: reasoning diff --git a/models/openai/o3-pro.yaml b/models/openai/o3-pro.yaml new file mode 100644 index 0000000..432e750 --- /dev/null +++ b/models/openai/o3-pro.yaml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: openai +authType: api_key +model: o3-pro +params: + - path: max_completion_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + default: 4096 + range: + min: 16 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls how much reasoning the model should perform before producing an answer. + default: medium + values: + - low + - medium + - high + - xhigh + group: reasoning diff --git a/models/perplexity/sonar-deep-research.yaml b/models/perplexity/sonar-deep-research.yaml new file mode 100644 index 0000000..99eadd3 --- /dev/null +++ b/models/perplexity/sonar-deep-research.yaml @@ -0,0 +1,98 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: perplexity +authType: api_key +model: sonar-deep-research +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + max: 128000 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls how much reasoning and searching the model performs before producing the report. + values: + - minimal + - low + - medium + - high + group: reasoning + - path: search_mode + type: enum + label: Search mode + description: Selects the corpus the model searches when grounding its answer. + values: + - web + - academic + - sec + group: provider_metadata + - path: search_recency_filter + type: enum + label: Search recency filter + description: Restricts web search results to a recent time window. + values: + - hour + - day + - week + - month + - year + group: provider_metadata + - path: search_domain_filter + type: string + label: Search domain filter + description: Limits search to, or excludes, specific domains. + group: provider_metadata + - path: search_after_date_filter + type: string + label: Search after date + description: Restricts search results to content published after this date (MM/DD/YYYY). + group: provider_metadata + - path: search_before_date_filter + type: string + label: Search before date + description: Restricts search results to content published before this date (MM/DD/YYYY). + group: provider_metadata + - path: web_search_options.search_context_size + type: enum + label: Search context size + description: Controls how much web search context is retrieved before generating the answer. + default: low + values: + - low + - medium + - high + group: provider_metadata + - path: return_images + type: boolean + label: Return images + description: Controls whether the response may include related images from the search. + default: false + group: provider_metadata + - path: return_related_questions + type: boolean + label: Return related questions + description: Controls whether the response includes suggested follow-up questions. + default: false + group: provider_metadata diff --git a/models/perplexity/sonar-pro.yaml b/models/perplexity/sonar-pro.yaml new file mode 100644 index 0000000..cd247da --- /dev/null +++ b/models/perplexity/sonar-pro.yaml @@ -0,0 +1,94 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: perplexity +authType: api_key +model: sonar-pro +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + max: 128000 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: search_mode + type: enum + label: Search mode + description: Selects the corpus the model searches when grounding its answer. + values: + - web + - academic + - sec + group: provider_metadata + - path: search_recency_filter + type: enum + label: Search recency filter + description: Restricts web search results to a recent time window. + values: + - hour + - day + - week + - month + - year + group: provider_metadata + - path: search_domain_filter + type: string + label: Search domain filter + description: Limits search to, or excludes, specific domains. + group: provider_metadata + - path: search_after_date_filter + type: string + label: Search after date + description: Restricts search results to content published after this date (MM/DD/YYYY). + group: provider_metadata + - path: search_before_date_filter + type: string + label: Search before date + description: Restricts search results to content published before this date (MM/DD/YYYY). + group: provider_metadata + - path: web_search_options.search_context_size + type: enum + label: Search context size + description: Controls how much web search context is retrieved before generating the answer. + default: low + values: + - low + - medium + - high + group: provider_metadata + - path: return_images + type: boolean + label: Return images + description: Controls whether the response may include related images from the search. + default: false + group: provider_metadata + - path: return_related_questions + type: boolean + label: Return related questions + description: Controls whether the response includes suggested follow-up questions. + default: false + group: provider_metadata + - path: disable_search + type: boolean + label: Disable search + description: Turns off web search so the model answers from its own knowledge only. + default: false + group: provider_metadata diff --git a/models/perplexity/sonar-reasoning-pro.yaml b/models/perplexity/sonar-reasoning-pro.yaml new file mode 100644 index 0000000..9725deb --- /dev/null +++ b/models/perplexity/sonar-reasoning-pro.yaml @@ -0,0 +1,94 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: perplexity +authType: api_key +model: sonar-reasoning-pro +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + max: 128000 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: search_mode + type: enum + label: Search mode + description: Selects the corpus the model searches when grounding its answer. + values: + - web + - academic + - sec + group: provider_metadata + - path: search_recency_filter + type: enum + label: Search recency filter + description: Restricts web search results to a recent time window. + values: + - hour + - day + - week + - month + - year + group: provider_metadata + - path: search_domain_filter + type: string + label: Search domain filter + description: Limits search to, or excludes, specific domains. + group: provider_metadata + - path: search_after_date_filter + type: string + label: Search after date + description: Restricts search results to content published after this date (MM/DD/YYYY). + group: provider_metadata + - path: search_before_date_filter + type: string + label: Search before date + description: Restricts search results to content published before this date (MM/DD/YYYY). + group: provider_metadata + - path: web_search_options.search_context_size + type: enum + label: Search context size + description: Controls how much web search context is retrieved before generating the answer. + default: low + values: + - low + - medium + - high + group: provider_metadata + - path: return_images + type: boolean + label: Return images + description: Controls whether the response may include related images from the search. + default: false + group: provider_metadata + - path: return_related_questions + type: boolean + label: Return related questions + description: Controls whether the response includes suggested follow-up questions. + default: false + group: provider_metadata + - path: disable_search + type: boolean + label: Disable search + description: Turns off web search so the model answers from its own knowledge only. + default: false + group: provider_metadata diff --git a/models/perplexity/sonar.yaml b/models/perplexity/sonar.yaml new file mode 100644 index 0000000..0b7c9f2 --- /dev/null +++ b/models/perplexity/sonar.yaml @@ -0,0 +1,94 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: perplexity +authType: api_key +model: sonar +params: + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of output tokens the model may generate. + range: + min: 1 + max: 128000 + group: generation_length + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + range: + min: 0 + max: 2 + step: 0.1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + range: + min: 0 + max: 1 + step: 0.01 + group: sampling + - path: search_mode + type: enum + label: Search mode + description: Selects the corpus the model searches when grounding its answer. + values: + - web + - academic + - sec + group: provider_metadata + - path: search_recency_filter + type: enum + label: Search recency filter + description: Restricts web search results to a recent time window. + values: + - hour + - day + - week + - month + - year + group: provider_metadata + - path: search_domain_filter + type: string + label: Search domain filter + description: Limits search to, or excludes, specific domains. + group: provider_metadata + - path: search_after_date_filter + type: string + label: Search after date + description: Restricts search results to content published after this date (MM/DD/YYYY). + group: provider_metadata + - path: search_before_date_filter + type: string + label: Search before date + description: Restricts search results to content published before this date (MM/DD/YYYY). + group: provider_metadata + - path: web_search_options.search_context_size + type: enum + label: Search context size + description: Controls how much web search context is retrieved before generating the answer. + default: low + values: + - low + - medium + - high + group: provider_metadata + - path: return_images + type: boolean + label: Return images + description: Controls whether the response may include related images from the search. + default: false + group: provider_metadata + - path: return_related_questions + type: boolean + label: Return related questions + description: Controls whether the response includes suggested follow-up questions. + default: false + group: provider_metadata + - path: disable_search + type: boolean + label: Disable search + description: Turns off web search so the model answers from its own knowledge only. + default: false + group: provider_metadata diff --git a/src/client/logos/meta.svg b/src/client/logos/meta.svg new file mode 100644 index 0000000..3053b25 --- /dev/null +++ b/src/client/logos/meta.svg @@ -0,0 +1,3 @@ + + +