Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,69 +27,6 @@ Native integration uses environment-specific YAML configuration files to define
```yaml
# Configuration file for managing multiple LLM and embedding models
llm_models:
- base_name: "claude-3-5-sonnet"
deployment_name: "anthropic.claude-3-5-sonnet-20240620-v1:0"
label: "Bedrock Claude 3.5 Sonnet"
multimodal: true
enabled: true
provider: "aws_bedrock"
max_output_tokens: 4096
cost:
input: 0.000003
output: 0.000015

- base_name: "claude-3-5-sonnet-v2"
deployment_name: "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
label: "Bedrock Claude 3.5 Sonnet v2 20241022"
multimodal: true
enabled: true
provider: "aws_bedrock"
max_output_tokens: 8192
cost:
input: 0.000003
output: 0.000015
cache_read_input_token_cost: 0.0000003

- base_name: "claude-3-7"
deployment_name: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
label: "Bedrock Claude 3.7 Sonnet v1"
multimodal: true
enabled: true
provider: "aws_bedrock"
max_output_tokens: 8192
cost:
input: 0.000003
output: 0.000015
cache_read_input_token_cost: 0.0000003

- base_name: "claude-4-sonnet"
deployment_name: "us.anthropic.claude-sonnet-4-20250514-v1:0"
label: "Bedrock Claude 4 Sonnet"
multimodal: true
enabled: true
provider: "aws_bedrock"
max_output_tokens: 32000
cost:
input: 0.000003
output: 0.000015
cache_read_input_token_cost: 0.0000003

- base_name: "claude-4-sonnet-1m"
deployment_name: "us.anthropic.claude-sonnet-4-20250514-v1:0"
label: "Bedrock Claude 4 Sonnet Long Context"
multimodal: true
enabled: true
provider: "aws_bedrock"
max_output_tokens: 32000
cost:
input: 0.000003
output: 0.000015
cache_read_input_token_cost: 0.0000003
configuration:
client_headers:
anthropic_beta:
- context-1m-2025-08-07

- base_name: "claude-4-5-haiku"
deployment_name: "us.anthropic.claude-haiku-4-5-20251001-v1:0"
label: "Bedrock Claude 4.5 Haiku"
Expand Down Expand Up @@ -131,30 +68,6 @@ llm_models:
output: 0.00000097
cache_read_input_token_cost: 0.00000024

- base_name: "claude-4-opus"
deployment_name: "us.anthropic.claude-opus-4-20250514-v1:0"
label: "Bedrock Claude 4 Opus"
multimodal: true
enabled: true
provider: "aws_bedrock"
max_output_tokens: 32000
cost:
input: 0.000015
output: 0.000075
cache_read_input_token_cost: 0.0000015

- base_name: "claude-4-1-opus"
deployment_name: "us.anthropic.claude-opus-4-1-20250805-v1:0"
label: "Bedrock Claude 4.1 Opus v1"
multimodal: true
enabled: true
provider: "aws_bedrock"
max_output_tokens: 32000
cost:
input: 0.000015
output: 0.000075
cache_read_input_token_cost: 0.0000015

- base_name: "claude-opus-4-5-20251101"
deployment_name: "us.anthropic.claude-opus-4-5-20251101-v1:0"
label: "Bedrock Claude Opus 4.5"
Expand Down Expand Up @@ -190,43 +103,6 @@ embeddings_models:
# https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/model-retirements
llm_models:

- base_name: "gpt-4o-2024-08-06"
deployment_name: "gpt-4o-2024-08-06"
label: "GPT-4o 2024-08-06"
multimodal: true
enabled: true
provider: "azure_openai"
max_output_tokens: 16383
cost:
input: 0.0000025
output: 0.00001
cache_read_input_token_cost: 0.00000125

- base_name: "gpt-4o-2024-11-20"
deployment_name: "gpt-4o-2024-11-20"
label: "GPT-4o 2024-11-20"
multimodal: true
default_for_categories: [global]
enabled: true
provider: "azure_openai"
max_output_tokens: 16383
cost:
input: 0.0000025
output: 0.00001
cache_read_input_token_cost: 0.00000125

- base_name: "gpt-4o-mini"
deployment_name: "gpt-4o-mini-2024-07-18"
label: "GPT-4o-mini-2024-07-18"
multimodal: true
enabled: true
provider: "azure_openai"
max_output_tokens: 16383
cost:
input: 0.000000165
output: 0.00000066
cache_read_input_token_cost: 0.000000075

- base_name: "gpt-4.1"
deployment_name: "gpt-4.1-2025-04-14"
label: "GPT-4.1 2025-04-14"
Expand Down Expand Up @@ -416,68 +292,6 @@ embeddings_models:
# Configuration file for managing multiple LLM and embedding models
llm_models:
# Ref. https://cloud.google.com/vertex-ai/generative-ai/pricing#token-based-pricing
- base_name: "gemini-2.0-flash"
deployment_name: "gemini-2.0-flash-001"
label: "Gemini 2.0 Flash"
multimodal: true
enabled: true
provider: "google_vertexai"
max_output_tokens: 8192
cost:
input: 0.00000015
output: 0.0000006
cache_read_input_token_cost: 0.0000000375

- base_name: "gemini-2.5-flash"
deployment_name: "gemini-2.5-flash"
label: "Gemini 2.5 Flash"
multimodal: true
enabled: true
provider: "google_vertexai"
max_output_tokens: 65535
cost:
input: 0.00000030
output: 0.0000025
cache_read_input_token_cost: 0.000000075

- base_name: "gemini-2.5-pro"
deployment_name: "gemini-2.5-pro"
label: "Gemini 2.5 Pro"
multimodal: true
enabled: true
default_for_categories: [global]
provider: "google_vertexai"
max_output_tokens: 65535
cost:
input: 0.00000125
output: 0.00001
cache_read_input_token_cost: 0.000000313

- base_name: "claude-sonnet-v2-vertex"
deployment_name: "claude-3-5-sonnet-v2"
label: "VertexAI Claude Sonnet 3.5 v2"
multimodal: false
enabled: true
default: false
provider: "google_vertexai"
max_output_tokens: 8192
cost:
input: 0.000003
output: 0.000015
cache_read_input_token_cost: 0.0000003

- base_name: "claude-sonnet-3-7-vertex"
deployment_name: "claude-3-7-sonnet"
label: "VertexAI Claude Sonnet 3.7"
multimodal: false
enabled: true
default: false
provider: "google_vertexai"
max_output_tokens: 8192
cost:
input: 0.000003
output: 0.000015
cache_read_input_token_cost: 0.0000003

embeddings_models:

Expand All @@ -502,35 +316,35 @@ Create a custom model configuration YAML file with your LLM and embedding models

```yaml
llm_models:
- base_name: "gpt-4o-2024-11-20"
deployment_name: "gpt-4o-2024-11-20"
label: "GPT-4o (Nov 2024)"
- base_name: "gpt-4.1"
deployment_name: "gpt-4.1-2025-04-14"
label: "GPT-4.1 2025-04-14"
multimodal: true
enabled: true
provider: "azure_openai"
default_for_categories: [global]
max_output_tokens: 16383
max_output_tokens: 32768
cost:
input: 0.0000025
output: 0.00001
cache_read_input_token_cost: 0.00000125
input: 0.000002
output: 0.000008
cache_read_input_token_cost: 0.0000005

- base_name: "gpt-4o-mini"
deployment_name: "gpt-4o-mini-2024-07-18"
label: "GPT-4o Mini (Jul 2024)"
- base_name: "gpt-4.1-mini"
deployment_name: "gpt-4.1-mini-2025-04-14"
label: "GPT-4.1 mini 2025-04-14"
multimodal: true
enabled: true
provider: "azure_openai"
max_output_tokens: 16383
max_output_tokens: 32768
cost:
input: 0.000000165
output: 0.00000066
cache_read_input_token_cost: 0.000000075
input: 0.0000004
output: 0.0000016
cache_read_input_token_cost: 0.0000001

- base_name: "claude-4-sonnet"
deployment_name: "anthropic.claude-4-sonnet-v1"
label: "Claude 4 Sonnet"
multimodal: false
- base_name: "claude-4-5-sonnet"
deployment_name: "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
label: "Claude 4.5 Sonnet"
multimodal: true
enabled: true
provider: "aws_bedrock"
cost:
Expand Down Expand Up @@ -582,7 +396,7 @@ extraObjects:
llm-production-config.yaml: |
# Paste your model configuration here
llm_models:
- base_name: "gpt-4o-2024-11-20"
- base_name: "gpt-4.1"
# ... (model config from Step 1)
```

Expand Down
Loading