From f73041e225ab49873a5295bd1d479eafde5cb5cd Mon Sep 17 00:00:00 2001 From: kernelalex <188699471+kernelalex@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:57:55 -0400 Subject: [PATCH 1/2] Opus 4.7 Support Add Bedrock adaptive thinking support for Claude Opus 4.7 --- backend/pkg/database/converter/converter.go | 9 +- backend/pkg/graph/generated.go | 78 +++++- backend/pkg/graph/model/models_gen.go | 48 +++- backend/pkg/graph/schema.graphqls | 12 +- .../providers/bedrock/adaptive_thinking.go | 173 +++++++++++++ .../bedrock/adaptive_thinking_test.go | 46 ++++ backend/pkg/providers/bedrock/bedrock.go | 15 +- backend/pkg/providers/bedrock/config.yml | 3 +- backend/pkg/providers/bedrock/models.yml | 11 + backend/pkg/providers/pconfig/config.go | 48 +++- backend/pkg/providers/pconfig/config_test.go | 46 ++++ frontend/graphql-schema.graphql | 1 + frontend/src/graphql/types.ts | 12 +- .../src/pages/settings/settings-provider.tsx | 245 +++++++++++++----- 14 files changed, 670 insertions(+), 77 deletions(-) create mode 100644 backend/pkg/providers/bedrock/adaptive_thinking.go create mode 100644 backend/pkg/providers/bedrock/adaptive_thinking_test.go diff --git a/backend/pkg/database/converter/converter.go b/backend/pkg/database/converter/converter.go index 256c0cbe..fa9c5aa3 100644 --- a/backend/pkg/database/converter/converter.go +++ b/backend/pkg/database/converter/converter.go @@ -613,9 +613,13 @@ func ConvertAgentConfigToGqlModel(ac *pconfig.AgentConfig) *model.AgentConfig { result.PresencePenalty = &ac.PresencePenalty } - if ac.Reasoning.Effort != llms.ReasoningNone || ac.Reasoning.MaxTokens != 0 { + if !ac.Reasoning.IsZero() { reasoning := &model.ReasoningConfig{} + if ac.Reasoning.Mode != pconfig.ReasoningModeDefault { + mode := model.ReasoningMode(ac.Reasoning.Mode) + reasoning.Mode = &mode + } if ac.Reasoning.Effort != llms.ReasoningNone { effort := model.ReasoningEffort(ac.Reasoning.Effort) reasoning.Effort = &effort @@ -708,6 +712,9 @@ func ConvertAgentConfigFromGqlModel(ac *model.AgentConfig) *pconfig.AgentConfig if ac.Reasoning != nil { reasoning := map[string]any{} + if ac.Reasoning.Mode != nil { + reasoning["mode"] = pconfig.ReasoningMode(*ac.Reasoning.Mode) + } if ac.Reasoning.Effort != nil { reasoning["effort"] = llms.ReasoningEffort(*ac.Reasoning.Effort) } diff --git a/backend/pkg/graph/generated.go b/backend/pkg/graph/generated.go index 2104fc72..45c9f277 100644 --- a/backend/pkg/graph/generated.go +++ b/backend/pkg/graph/generated.go @@ -458,6 +458,7 @@ type ComplexityRoot struct { ReasoningConfig struct { Effort func(childComplexity int) int MaxTokens func(childComplexity int) int + Mode func(childComplexity int) int } Screenshot struct { @@ -3010,6 +3011,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.ReasoningConfig.MaxTokens(childComplexity), true + case "ReasoningConfig.mode": + if e.complexity.ReasoningConfig.Mode == nil { + break + } + + return e.complexity.ReasoningConfig.Mode(childComplexity), true + case "Screenshot.createdAt": if e.complexity.Screenshot.CreatedAt == nil { break @@ -8202,6 +8210,8 @@ func (ec *executionContext) fieldContext_AgentConfig_reasoning(_ context.Context IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { switch field.Name { + case "mode": + return ec.fieldContext_ReasoningConfig_mode(ctx, field) case "effort": return ec.fieldContext_ReasoningConfig_effort(ctx, field) case "maxTokens": @@ -21743,6 +21753,47 @@ func (ec *executionContext) fieldContext_Query___schema(_ context.Context, field return fc, nil } +func (ec *executionContext) _ReasoningConfig_mode(ctx context.Context, field graphql.CollectedField, obj *model.ReasoningConfig) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_ReasoningConfig_mode(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.Mode, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.(*model.ReasoningMode) + fc.Result = res + return ec.marshalOReasoningMode2ᚖpentagiᚋpkgᚋgraphᚋmodelᚐReasoningMode(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_ReasoningConfig_mode(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "ReasoningConfig", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type ReasoningMode does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _ReasoningConfig_effort(ctx context.Context, field graphql.CollectedField, obj *model.ReasoningConfig) (ret graphql.Marshaler) { fc, err := ec.fieldContext_ReasoningConfig_effort(ctx, field) if err != nil { @@ -30891,13 +30942,20 @@ func (ec *executionContext) unmarshalInputReasoningConfigInput(ctx context.Conte asMap[k] = v } - fieldsInOrder := [...]string{"effort", "maxTokens"} + fieldsInOrder := [...]string{"mode", "effort", "maxTokens"} for _, k := range fieldsInOrder { v, ok := asMap[k] if !ok { continue } switch k { + case "mode": + ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("mode")) + data, err := ec.unmarshalOReasoningMode2ᚖpentagiᚋpkgᚋgraphᚋmodelᚐReasoningMode(ctx, v) + if err != nil { + return it, err + } + it.Mode = data case "effort": ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("effort")) data, err := ec.unmarshalOReasoningEffort2ᚖpentagiᚋpkgᚋgraphᚋmodelᚐReasoningEffort(ctx, v) @@ -34309,6 +34367,8 @@ func (ec *executionContext) _ReasoningConfig(ctx context.Context, sel ast.Select switch field.Name { case "__typename": out.Values[i] = graphql.MarshalString("ReasoningConfig") + case "mode": + out.Values[i] = ec._ReasoningConfig_mode(ctx, field, obj) case "effort": out.Values[i] = ec._ReasoningConfig_effort(ctx, field, obj) case "maxTokens": @@ -38131,6 +38191,22 @@ func (ec *executionContext) marshalOReasoningEffort2ᚖpentagiᚋpkgᚋgraphᚋm return v } +func (ec *executionContext) unmarshalOReasoningMode2ᚖpentagiᚋpkgᚋgraphᚋmodelᚐReasoningMode(ctx context.Context, v interface{}) (*model.ReasoningMode, error) { + if v == nil { + return nil, nil + } + var res = new(model.ReasoningMode) + err := res.UnmarshalGQL(v) + return res, graphql.ErrorOnPath(ctx, err) +} + +func (ec *executionContext) marshalOReasoningMode2ᚖpentagiᚋpkgᚋgraphᚋmodelᚐReasoningMode(ctx context.Context, sel ast.SelectionSet, v *model.ReasoningMode) graphql.Marshaler { + if v == nil { + return graphql.Null + } + return v +} + func (ec *executionContext) marshalOScreenshot2ᚕᚖpentagiᚋpkgᚋgraphᚋmodelᚐScreenshotᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.Screenshot) graphql.Marshaler { if v == nil { return graphql.Null diff --git a/backend/pkg/graph/model/models_gen.go b/backend/pkg/graph/model/models_gen.go index 35a09dcd..caf102a7 100644 --- a/backend/pkg/graph/model/models_gen.go +++ b/backend/pkg/graph/model/models_gen.go @@ -362,6 +362,7 @@ type Query struct { } type ReasoningConfig struct { + Mode *ReasoningMode `json:"mode,omitempty"` Effort *ReasoningEffort `json:"effort,omitempty"` MaxTokens *int `json:"maxTokens,omitempty"` } @@ -946,12 +947,16 @@ func (e ProviderType) MarshalGQL(w io.Writer) { type ReasoningEffort string const ( + ReasoningEffortXhigh ReasoningEffort = "xhigh" + ReasoningEffortMax ReasoningEffort = "max" ReasoningEffortHigh ReasoningEffort = "high" ReasoningEffortMedium ReasoningEffort = "medium" ReasoningEffortLow ReasoningEffort = "low" ) var AllReasoningEffort = []ReasoningEffort{ + ReasoningEffortXhigh, + ReasoningEffortMax, ReasoningEffortHigh, ReasoningEffortMedium, ReasoningEffortLow, @@ -959,7 +964,7 @@ var AllReasoningEffort = []ReasoningEffort{ func (e ReasoningEffort) IsValid() bool { switch e { - case ReasoningEffortHigh, ReasoningEffortMedium, ReasoningEffortLow: + case ReasoningEffortXhigh, ReasoningEffortMax, ReasoningEffortHigh, ReasoningEffortMedium, ReasoningEffortLow: return true } return false @@ -986,6 +991,47 @@ func (e ReasoningEffort) MarshalGQL(w io.Writer) { fmt.Fprint(w, strconv.Quote(e.String())) } +type ReasoningMode string + +const ( + ReasoningModeAdaptive ReasoningMode = "adaptive" + ReasoningModeBudget ReasoningMode = "budget" +) + +var AllReasoningMode = []ReasoningMode{ + ReasoningModeAdaptive, + ReasoningModeBudget, +} + +func (e ReasoningMode) IsValid() bool { + switch e { + case ReasoningModeAdaptive, ReasoningModeBudget: + return true + } + return false +} + +func (e ReasoningMode) String() string { + return string(e) +} + +func (e *ReasoningMode) UnmarshalGQL(v interface{}) error { + str, ok := v.(string) + if !ok { + return fmt.Errorf("enums must be strings") + } + + *e = ReasoningMode(str) + if !e.IsValid() { + return fmt.Errorf("%s is not a valid ReasoningMode", str) + } + return nil +} + +func (e ReasoningMode) MarshalGQL(w io.Writer) { + fmt.Fprint(w, strconv.Quote(e.String())) +} + type ResultFormat string const ( diff --git a/backend/pkg/graph/schema.graphqls b/backend/pkg/graph/schema.graphqls index e5c72def..a2fd7099 100644 --- a/backend/pkg/graph/schema.graphqls +++ b/backend/pkg/graph/schema.graphqls @@ -23,13 +23,21 @@ enum ProviderType { qwen } -# Reasoning effort levels for advanced AI models (OpenAI format) +# Reasoning effort levels for advanced AI models enum ReasoningEffort { + xhigh + max high medium low } +# Reasoning control mode for provider-specific thinking APIs +enum ReasoningMode { + adaptive + budget +} + # Template types for AI agent prompts and system operations enum PromptType { primary_agent @@ -699,6 +707,7 @@ type ProviderConfig { # AI model reasoning configuration type ReasoningConfig { + mode: ReasoningMode effort: ReasoningEffort maxTokens: Int } @@ -748,6 +757,7 @@ type AgentsConfig { # Input type for ReasoningConfig input ReasoningConfigInput { + mode: ReasoningMode effort: ReasoningEffort maxTokens: Int } diff --git a/backend/pkg/providers/bedrock/adaptive_thinking.go b/backend/pkg/providers/bedrock/adaptive_thinking.go new file mode 100644 index 00000000..a8befd64 --- /dev/null +++ b/backend/pkg/providers/bedrock/adaptive_thinking.go @@ -0,0 +1,173 @@ +package bedrock + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "strconv" + + "pentagi/pkg/providers/pconfig" + + "github.com/aws/smithy-go/middleware" + smithyhttp "github.com/aws/smithy-go/transport/http" + "github.com/vxcontrol/langchaingo/llms" +) + +type adaptiveThinkingEffortContextKey struct{} + +func withAdaptiveThinkingEffort(ctx context.Context, effort string) context.Context { + if effort == "" { + effort = string(llms.ReasoningHigh) + } + + return context.WithValue(ctx, adaptiveThinkingEffortContextKey{}, effort) +} + +func adaptiveThinkingEffortFromContext(ctx context.Context) (string, bool) { + effort, ok := ctx.Value(adaptiveThinkingEffortContextKey{}).(string) + return effort, ok && effort != "" +} + +func addAdaptiveThinkingMiddleware(stack *middleware.Stack) error { + return stack.Build.Add(middleware.BuildMiddlewareFunc( + "PentAGIBedrockAdaptiveThinking", + func(ctx context.Context, in middleware.BuildInput, next middleware.BuildHandler) ( + middleware.BuildOutput, + middleware.Metadata, + error, + ) { + effort, ok := adaptiveThinkingEffortFromContext(ctx) + if !ok { + return next.HandleBuild(ctx, in) + } + + req, ok := in.Request.(*smithyhttp.Request) + if !ok || req.GetStream() == nil { + return next.HandleBuild(ctx, in) + } + + body, err := io.ReadAll(req.GetStream()) + if err != nil { + return middleware.BuildOutput{}, middleware.Metadata{}, fmt.Errorf("read Bedrock request body: %w", err) + } + + updatedBody, err := rewriteAdaptiveThinkingBody(body, effort) + if err != nil { + return middleware.BuildOutput{}, middleware.Metadata{}, err + } + + updatedReq, err := req.SetStream(bytes.NewReader(updatedBody)) + if err != nil { + return middleware.BuildOutput{}, middleware.Metadata{}, fmt.Errorf("replace Bedrock request body: %w", err) + } + updatedReq.ContentLength = int64(len(updatedBody)) + updatedReq.Header.Set("Content-Length", strconv.Itoa(len(updatedBody))) + in.Request = updatedReq + + return next.HandleBuild(ctx, in) + }, + ), middleware.After) +} + +func rewriteAdaptiveThinkingBody(body []byte, effort string) ([]byte, error) { + // langchaingo currently emits budget-based Anthropic thinking for Bedrock; + // Claude adaptive thinking expects thinking.type=adaptive plus output_config.effort. + var payload map[string]any + if err := json.Unmarshal(body, &payload); err != nil { + return nil, fmt.Errorf("decode Bedrock request body: %w", err) + } + + fields, ok := payload["additionalModelRequestFields"].(map[string]any) + if !ok { + return body, nil + } + + thinking, ok := fields["thinking"].(map[string]any) + if !ok { + return body, nil + } + + thinking["type"] = "adaptive" + delete(thinking, "budget_tokens") + fields["output_config"] = map[string]any{ + "effort": effort, + } + + updatedBody, err := json.Marshal(payload) + if err != nil { + return nil, fmt.Errorf("encode Bedrock request body: %w", err) + } + + return updatedBody, nil +} + +func (p *bedrockProvider) prepareCallOptions( + ctx context.Context, + opt pconfig.ProviderOptionsType, + options []llms.CallOption, +) (context.Context, []llms.CallOption) { + reasoning, ok := p.reasoningConfigForType(opt) + if !ok || reasoning.EffectiveMode() != pconfig.ReasoningModeAdaptive { + return ctx, options + } + + effort := string(reasoning.Effort) + ctx = withAdaptiveThinkingEffort(ctx, effort) + options = append(options, llms.WithReasoning(llms.ReasoningHigh, 0)) + + return ctx, options +} + +func (p *bedrockProvider) reasoningConfigForType(opt pconfig.ProviderOptionsType) (pconfig.ReasoningConfig, bool) { + agentConfig := p.agentConfigForType(opt) + if agentConfig == nil || agentConfig.Reasoning.IsZero() { + return pconfig.ReasoningConfig{}, false + } + + return agentConfig.Reasoning, true +} + +func (p *bedrockProvider) agentConfigForType(opt pconfig.ProviderOptionsType) *pconfig.AgentConfig { + if p == nil || p.providerConfig == nil { + return nil + } + + switch opt { + case pconfig.OptionsTypeSimple: + return p.providerConfig.Simple + case pconfig.OptionsTypeSimpleJSON: + if p.providerConfig.SimpleJSON != nil { + return p.providerConfig.SimpleJSON + } + return p.providerConfig.Simple + case pconfig.OptionsTypePrimaryAgent: + return p.providerConfig.PrimaryAgent + case pconfig.OptionsTypeAssistant: + if p.providerConfig.Assistant != nil { + return p.providerConfig.Assistant + } + return p.providerConfig.PrimaryAgent + case pconfig.OptionsTypeGenerator: + return p.providerConfig.Generator + case pconfig.OptionsTypeRefiner: + return p.providerConfig.Refiner + case pconfig.OptionsTypeAdviser: + return p.providerConfig.Adviser + case pconfig.OptionsTypeReflector: + return p.providerConfig.Reflector + case pconfig.OptionsTypeSearcher: + return p.providerConfig.Searcher + case pconfig.OptionsTypeEnricher: + return p.providerConfig.Enricher + case pconfig.OptionsTypeCoder: + return p.providerConfig.Coder + case pconfig.OptionsTypeInstaller: + return p.providerConfig.Installer + case pconfig.OptionsTypePentester: + return p.providerConfig.Pentester + default: + return nil + } +} diff --git a/backend/pkg/providers/bedrock/adaptive_thinking_test.go b/backend/pkg/providers/bedrock/adaptive_thinking_test.go new file mode 100644 index 00000000..2bbbd4e1 --- /dev/null +++ b/backend/pkg/providers/bedrock/adaptive_thinking_test.go @@ -0,0 +1,46 @@ +package bedrock + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRewriteAdaptiveThinkingBody(t *testing.T) { + body := []byte(`{ + "additionalModelRequestFields": { + "thinking": { + "type": "enabled", + "budget_tokens": 4096 + } + }, + "inferenceConfig": { + "maxTokens": 16384 + } + }`) + + updatedBody, err := rewriteAdaptiveThinkingBody(body, "xhigh") + require.NoError(t, err) + + var payload map[string]any + require.NoError(t, json.Unmarshal(updatedBody, &payload)) + + fields := payload["additionalModelRequestFields"].(map[string]any) + thinking := fields["thinking"].(map[string]any) + outputConfig := fields["output_config"].(map[string]any) + + assert.Equal(t, "adaptive", thinking["type"]) + assert.NotContains(t, thinking, "budget_tokens") + assert.Equal(t, "xhigh", outputConfig["effort"]) + assert.Equal(t, map[string]any{"maxTokens": float64(16384)}, payload["inferenceConfig"]) +} + +func TestRewriteAdaptiveThinkingBodyWithoutThinking(t *testing.T) { + body := []byte(`{"inferenceConfig":{"maxTokens":1024}}`) + + updatedBody, err := rewriteAdaptiveThinkingBody(body, "high") + require.NoError(t, err) + assert.JSONEq(t, string(body), string(updatedBody)) +} diff --git a/backend/pkg/providers/bedrock/bedrock.go b/backend/pkg/providers/bedrock/bedrock.go index 0c325276..1d9f4425 100644 --- a/backend/pkg/providers/bedrock/bedrock.go +++ b/backend/pkg/providers/bedrock/bedrock.go @@ -19,6 +19,7 @@ import ( "github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/service/bedrockruntime" smithybearer "github.com/aws/smithy-go/auth/bearer" + "github.com/aws/smithy-go/middleware" "github.com/invopop/jsonschema" "github.com/vxcontrol/langchaingo/llms" "github.com/vxcontrol/langchaingo/llms/bedrock" @@ -84,6 +85,9 @@ func New( ) (provider.Provider, error) { opts := []func(*bconfig.LoadOptions) error{ bconfig.WithRegion(cfg.BedrockRegion), + bconfig.WithAPIOptions([]func(*middleware.Stack) error{ + addAdaptiveThinkingMiddleware, + }), } // Choose authentication strategy based on configuration @@ -195,9 +199,10 @@ func (p *bedrockProvider) Call( opt pconfig.ProviderOptionsType, prompt string, ) (string, error) { + ctx, options := p.prepareCallOptions(ctx, opt, p.providerConfig.GetOptionsForType(opt)) + return provider.WrapGenerateFromSinglePrompt( - ctx, p, opt, p.llm, prompt, - p.providerConfig.GetOptionsForType(opt)..., + ctx, p, opt, p.llm, prompt, options..., ) } @@ -223,10 +228,11 @@ func (p *bedrockProvider) CallEx( // Clean tools from $schema field tools = cleanToolSchemas(tools) - // Build final options: streaming + config + cleaned tools LAST (to override any dirty tools from config) + // Put cleaned tools after config to override any dirty tools restored from config. options := []llms.CallOption{llms.WithStreamingFunc(streamCb)} options = append(options, configOptions...) options = append(options, llms.WithTools(tools)) + ctx, options = p.prepareCallOptions(ctx, opt, options) return provider.WrapGenerateContent(ctx, p, opt, p.llm.GenerateContent, chain, options...) } @@ -249,8 +255,9 @@ func (p *bedrockProvider) CallWithTools( configOptions := p.providerConfig.GetOptionsForType(opt) - // Build final options: config + streaming + cleaned tools LAST (to override any dirty tools from config) + // Put cleaned tools after config to override any dirty tools restored from config. options := append(configOptions, llms.WithStreamingFunc(streamCb), llms.WithTools(tools)) + ctx, options = p.prepareCallOptions(ctx, opt, options) return provider.WrapGenerateContent(ctx, p, opt, p.llm.GenerateContent, chain, options...) } diff --git a/backend/pkg/providers/bedrock/config.yml b/backend/pkg/providers/bedrock/config.yml index dc877e50..9848cc4c 100644 --- a/backend/pkg/providers/bedrock/config.yml +++ b/backend/pkg/providers/bedrock/config.yml @@ -83,7 +83,8 @@ adviser: n: 1 max_tokens: 16384 reasoning: - max_tokens: 4096 + mode: adaptive + effort: high price: input: 5.0 output: 25.0 diff --git a/backend/pkg/providers/bedrock/models.yml b/backend/pkg/providers/bedrock/models.yml index 20f3c706..b656f95e 100644 --- a/backend/pkg/providers/bedrock/models.yml +++ b/backend/pkg/providers/bedrock/models.yml @@ -39,6 +39,17 @@ input: 0.035 output: 0.14 +# Anthropic Claude 4.7 Series - Latest Opus generation with adaptive thinking +- name: us.anthropic.claude-opus-4-7 + description: Most capable Opus model for advanced software engineering, long-running agentic tasks, professional work, and rigorous security analysis with adaptive thinking + thinking: true + release_date: 2026-04-16 + price: + input: 5.0 + output: 25.0 + cache_read: 0.5 + cache_write: 6.25 + # Anthropic Claude 4.6 Series - Latest generation with world-class coding and agentic capabilities - name: us.anthropic.claude-opus-4-6-v1 description: World's best model for coding, enterprise agents, and professional work with industry-leading reliability for agentic workflows and security analysis diff --git a/backend/pkg/providers/pconfig/config.go b/backend/pkg/providers/pconfig/config.go index eaf4e0a0..8d5c4dfc 100644 --- a/backend/pkg/providers/pconfig/config.go +++ b/backend/pkg/providers/pconfig/config.go @@ -185,11 +185,36 @@ type PriceInfo struct { CacheWrite float64 `json:"cache_write,omitempty" yaml:"cache_write,omitempty"` } +type ReasoningMode string + +const ( + ReasoningModeDefault ReasoningMode = "" + ReasoningModeAdaptive ReasoningMode = "adaptive" + ReasoningModeBudget ReasoningMode = "budget" +) + type ReasoningConfig struct { + Mode ReasoningMode `json:"mode,omitempty" yaml:"mode,omitempty"` Effort llms.ReasoningEffort `json:"effort,omitempty" yaml:"effort,omitempty"` MaxTokens int `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"` } +func (rc ReasoningConfig) IsZero() bool { + return rc.Mode == ReasoningModeDefault && + rc.Effort == llms.ReasoningNone && + rc.MaxTokens == 0 +} + +func (rc ReasoningConfig) EffectiveMode() ReasoningMode { + if rc.Mode != ReasoningModeDefault { + return rc.Mode + } + if rc.MaxTokens != 0 && (rc.Effort == llms.ReasoningNone || rc.Effort == llms.ReasoningEffort("none")) { + return ReasoningModeBudget + } + return ReasoningModeDefault +} + // AgentConfig represents the configuration for a single agent type AgentConfig struct { Model string `json:"model,omitempty" yaml:"model,omitempty"` @@ -572,14 +597,25 @@ func (ac *AgentConfig) BuildOptions() []llms.CallOption { if _, ok := ac.raw["response_mime_type"]; ok && ac.ResponseMIMEType != "" { options = append(options, llms.WithResponseMIMEType(ac.ResponseMIMEType)) } - if _, ok := ac.raw["reasoning"]; ok && (ac.Reasoning.Effort != llms.ReasoningNone || ac.Reasoning.MaxTokens != 0) { - switch ac.Reasoning.Effort { - case llms.ReasoningLow, llms.ReasoningMedium, llms.ReasoningHigh: - options = append(options, llms.WithReasoning(ac.Reasoning.Effort, 0)) - default: + if _, ok := ac.raw["reasoning"]; ok && !ac.Reasoning.IsZero() { + switch ac.Reasoning.EffectiveMode() { + case ReasoningModeAdaptive: + metadata := map[string]any{ + "reasoning_mode": string(ReasoningModeAdaptive), + } + if ac.Reasoning.Effort != llms.ReasoningNone { + metadata["reasoning_effort"] = string(ac.Reasoning.Effort) + } + options = append(options, llms.WithMetadata(metadata)) + case ReasoningModeBudget: if ac.Reasoning.MaxTokens > 0 && ac.Reasoning.MaxTokens <= 32000 { options = append(options, llms.WithReasoning(llms.ReasoningNone, ac.Reasoning.MaxTokens)) } + default: + switch ac.Reasoning.Effort { + case llms.ReasoningLow, llms.ReasoningMedium, llms.ReasoningHigh: + options = append(options, llms.WithReasoning(ac.Reasoning.Effort, 0)) + } } } if _, ok := ac.raw["extra_body"]; ok && ac.ExtraBody != nil { @@ -643,7 +679,7 @@ func (ac *AgentConfig) marshalMap() map[string]any { if ac.ResponseMIMEType != "" { output["response_mime_type"] = ac.ResponseMIMEType } - if ac.Reasoning.Effort != llms.ReasoningNone || ac.Reasoning.MaxTokens != 0 { + if !ac.Reasoning.IsZero() { output["reasoning"] = ac.Reasoning } if ac.Price != nil { diff --git a/backend/pkg/providers/pconfig/config_test.go b/backend/pkg/providers/pconfig/config_test.go index e899de7b..c6ee170a 100644 --- a/backend/pkg/providers/pconfig/config_test.go +++ b/backend/pkg/providers/pconfig/config_test.go @@ -47,6 +47,14 @@ func TestReasoningConfig_UnmarshalJSON(t *testing.T) { MaxTokens: 2000, }, }, + { + name: "with adaptive mode", + json: `{"mode": "adaptive", "effort": "xhigh"}`, + want: ReasoningConfig{ + Mode: ReasoningModeAdaptive, + Effort: llms.ReasoningEffort("xhigh"), + }, + }, { name: "invalid json", json: "{invalid}", @@ -65,6 +73,7 @@ func TestReasoningConfig_UnmarshalJSON(t *testing.T) { } require.NoError(t, err) + assert.Equal(t, tt.want.Mode, got.Mode) assert.Equal(t, tt.want.Effort, got.Effort) assert.Equal(t, tt.want.MaxTokens, got.MaxTokens) }) @@ -108,6 +117,17 @@ max_tokens: 2000 MaxTokens: 2000, }, }, + { + name: "with adaptive mode", + yaml: ` +mode: adaptive +effort: xhigh +`, + want: ReasoningConfig{ + Mode: ReasoningModeAdaptive, + Effort: llms.ReasoningEffort("xhigh"), + }, + }, { name: "invalid yaml", yaml: "invalid: [yaml", @@ -126,6 +146,7 @@ max_tokens: 2000 } require.NoError(t, err) + assert.Equal(t, tt.want.Mode, got.Mode) assert.Equal(t, tt.want.Effort, got.Effort) assert.Equal(t, tt.want.MaxTokens, got.MaxTokens) }) @@ -923,6 +944,31 @@ func TestAgentConfig_BuildOptions(t *testing.T) { }`, wantLen: 3, // model, temperature, reasoning (max_tokens is set) }, + { + name: "with adaptive reasoning mode", + format: "json", + config: `{ + "model": "test-model", + "temperature": 0.7, + "reasoning": { + "mode": "adaptive", + "effort": "xhigh" + } + }`, + wantLen: 3, // model, temperature, metadata for adaptive reasoning + checkOptions: func(t *testing.T, options []llms.CallOption) { + var opts llms.CallOptions + for _, option := range options { + option(&opts) + } + + assert.Equal(t, map[string]interface{}{ + "reasoning_effort": "xhigh", + "reasoning_mode": "adaptive", + }, opts.Metadata) + assert.Nil(t, opts.Reasoning) + }, + }, { name: "with invalid reasoning tokens over limit", format: "json", diff --git a/frontend/graphql-schema.graphql b/frontend/graphql-schema.graphql index fae09675..8c18950f 100644 --- a/frontend/graphql-schema.graphql +++ b/frontend/graphql-schema.graphql @@ -292,6 +292,7 @@ fragment agentConfigFragment on AgentConfig { frequencyPenalty presencePenalty reasoning { + mode effort maxTokens } diff --git a/frontend/src/graphql/types.ts b/frontend/src/graphql/types.ts index 964c4942..45ba0baa 100644 --- a/frontend/src/graphql/types.ts +++ b/frontend/src/graphql/types.ts @@ -803,17 +803,26 @@ export type QueryVectorStoreLogsArgs = { export type ReasoningConfig = { effort?: Maybe; maxTokens?: Maybe; + mode?: Maybe; }; export type ReasoningConfigInput = { effort?: InputMaybe; maxTokens?: InputMaybe; + mode?: InputMaybe; }; export enum ReasoningEffort { High = 'high', Low = 'low', + Max = 'max', Medium = 'medium', + Xhigh = 'xhigh', +} + +export enum ReasoningMode { + Adaptive = 'adaptive', + Budget = 'budget', } export enum ResultFormat { @@ -1330,7 +1339,7 @@ export type AgentConfigFragmentFragment = { repetitionPenalty?: number | null; frequencyPenalty?: number | null; presencePenalty?: number | null; - reasoning?: { effort?: ReasoningEffort | null; maxTokens?: number | null } | null; + reasoning?: { mode?: ReasoningMode | null; effort?: ReasoningEffort | null; maxTokens?: number | null } | null; price?: { input: number; output: number; cacheRead: number; cacheWrite: number } | null; }; @@ -2301,6 +2310,7 @@ export const AgentConfigFragmentFragmentDoc = gql` frequencyPenalty presencePenalty reasoning { + mode effort maxTokens } diff --git a/frontend/src/pages/settings/settings-provider.tsx b/frontend/src/pages/settings/settings-provider.tsx index b8f44799..2333943d 100644 --- a/frontend/src/pages/settings/settings-provider.tsx +++ b/frontend/src/pages/settings/settings-provider.tsx @@ -14,7 +14,7 @@ import { XCircle, } from 'lucide-react'; import { useEffect, useMemo, useRef, useState } from 'react'; -import { useController, useForm, useFormState, useWatch } from 'react-hook-form'; +import { type Control, useController, useForm, type UseFormSetValue, useFormState, useWatch } from 'react-hook-form'; import { useNavigate, useParams, useSearchParams } from 'react-router-dom'; import { z } from 'zod'; @@ -39,6 +39,7 @@ import { StatusCard } from '@/components/ui/status-card'; import { AgentConfigType, ReasoningEffort, + ReasoningMode, useCreateProviderMutation, useDeleteProviderMutation, useSettingsProvidersQuery, @@ -328,7 +329,9 @@ const FormModelComboboxItem: React.FC = ({ const displayValue = field.value ?? ''; // Format price for display - const formatPrice = (price?: null | { cacheRead: number; cacheWrite: number; input: number; output: number }): string => { + const formatPrice = ( + price?: null | { cacheRead: number; cacheWrite: number; input: number; output: number }, + ): string => { if (!price || ((!price.input || price.input === 0) && (!price.output || price.output === 0))) { return 'free'; } @@ -338,7 +341,7 @@ const FormModelComboboxItem: React.FC = ({ }; const basePrice = `$${formatValue(price.input)}/$${formatValue(price.output)}`; - + // Add cache prices if available const hasCachePrices = (price.cacheRead && price.cacheRead > 0) || (price.cacheWrite && price.cacheWrite > 0); @@ -519,6 +522,10 @@ const agentConfigSchema = z (value) => (value === '' || value === undefined ? null : value), z.number().nullable().optional(), ), + mode: z.preprocess( + (value) => (value === '' || value === undefined ? null : value), + z.string().nullable().optional(), + ), }) .nullable() .optional(), @@ -559,6 +566,15 @@ type FormData = z.infer; // Convert camelCase key to display name (e.g., 'simpleJson' -> 'Simple Json') const getName = (key: string): string => key.replaceAll(/([A-Z])/g, ' $1').replace(/^./, (item) => item.toUpperCase()); +const isBedrockAdaptiveThinkingModel = (model: string | undefined): boolean => + !!model && /anthropic\.claude-(opus|sonnet)-4-[67]/.test(model); + +const getBedrockAdaptiveDefaultEffort = (model: string | undefined): ReasoningEffort => + model?.includes('claude-opus-4-7') ? ReasoningEffort.Xhigh : ReasoningEffort.High; + +const isAdaptiveOnlyReasoningEffort = (effort: null | string | undefined): boolean => + effort === ReasoningEffort.Xhigh || effort === ReasoningEffort.Max; + // Helper function to convert string to ReasoningEffort enum const getReasoningEffort = (effort: null | string | undefined): null | ReasoningEffort => { if (!effort) { @@ -574,10 +590,39 @@ const getReasoningEffort = (effort: null | string | undefined): null | Reasoning return ReasoningEffort.Low; } + case 'max': { + return ReasoningEffort.Max; + } + case 'medium': { return ReasoningEffort.Medium; } + case 'xhigh': { + return ReasoningEffort.Xhigh; + } + + default: { + return null; + } + } +}; + +// Helper function to convert string to ReasoningMode enum +const getReasoningMode = (mode: null | string | undefined): null | ReasoningMode => { + if (!mode) { + return null; + } + + switch (mode.toLowerCase()) { + case 'adaptive': { + return ReasoningMode.Adaptive; + } + + case 'budget': { + return ReasoningMode.Budget; + } + default: { return null; } @@ -619,6 +664,7 @@ const transformFormToGraphQL = ( ? { effort: getReasoningEffort(data?.reasoning.effort), maxTokens: data?.reasoning.maxTokens ?? null, + mode: getReasoningMode(data?.reasoning.mode), } : null, repetitionPenalty: data?.repetitionPenalty ?? null, @@ -637,6 +683,120 @@ const transformFormToGraphQL = ( }; }; +interface ReasoningConfigurationFieldsProps { + agentKey: string; + control: Control; + disabled?: boolean; + setValue: UseFormSetValue; +} + +const ReasoningConfigurationFields: React.FC = ({ + agentKey, + control, + disabled, + setValue, +}) => { + const reasoningMode = useWatch({ control, name: `agents.${agentKey}.reasoning.mode` as const }); + const reasoningEffort = useWatch({ control, name: `agents.${agentKey}.reasoning.effort` as const }); + const isAdaptiveReasoning = reasoningMode === ReasoningMode.Adaptive; + + return ( +
+
+

Reasoning Configuration

+
+ {/* Reasoning Mode field */} + ( + + Reasoning Mode + + Use Adaptive for Claude Opus/Sonnet 4.6+ on Bedrock. + + + )} + /> + + {/* Reasoning Effort field */} + ( + + Reasoning Effort + + + + )} + /> + + {/* Reasoning Max Tokens field */} + {!isAdaptiveReasoning && ( + + )} +
+
+
+ ); +}; + // Helper function to recursively remove __typename from objects const normalizeGraphQLData = (obj: unknown): unknown => { if (obj === null || obj === undefined) { @@ -1526,6 +1686,21 @@ const SettingsProvider = () => { `agents.${agentKey}.price.cacheWrite` as const, price?.cacheWrite ?? null, ); + + if (isBedrockAdaptiveThinkingModel(option?.name)) { + setValue( + `agents.${agentKey}.reasoning.mode` as const, + ReasoningMode.Adaptive, + ); + setValue( + `agents.${agentKey}.reasoning.effort` as const, + getBedrockAdaptiveDefaultEffort(option?.name), + ); + setValue( + `agents.${agentKey}.reasoning.maxTokens` as const, + null, + ); + } }} options={availableModels} placeholder="Select or enter model name" @@ -1637,64 +1812,12 @@ const SettingsProvider = () => { {/* Reasoning Configuration */} -
-
-

Reasoning Configuration

-
- {/* Reasoning Effort field */} - ( - - Reasoning Effort - - - - )} - /> - - {/* Reasoning Max Tokens field */} - -
-
-
+ {/* Price Configuration */}
From b1c4adc39043efbc81c17719c64670587466a7ab Mon Sep 17 00:00:00 2001 From: kernelalex <188699471+kernelalex@users.noreply.github.com> Date: Sun, 26 Apr 2026 22:58:14 -0400 Subject: [PATCH 2/2] Updated README.md Adding clear support for opus 4.7 in the README.md file --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7ddc15ba..48525361 100644 --- a/README.md +++ b/README.md @@ -1751,7 +1751,7 @@ PROXY_URL=http://your-proxy:8080 #### Supported Models -PentAGI supports 21 AWS Bedrock models with tool calling, streaming, and multimodal capabilities. Models marked with `*` are used in default configuration. +PentAGI supports 22 AWS Bedrock models with tool calling, streaming, and multimodal capabilities. Models marked with `*` are used in default configuration. | Model ID | Provider | Thinking | Multimodal | Price (Input/Output) | Use Case | | ------------------------------------------------ | --------------- | -------- | ---------- | -------------------- | --------------------------------------- | @@ -1760,6 +1760,7 @@ PentAGI supports 21 AWS Bedrock models with tool calling, streaming, and multimo | `us.amazon.nova-pro-v1:0` | Amazon Nova | ❌ | ✅ | $0.80/$3.20 | Balanced accuracy, speed, cost | | `us.amazon.nova-lite-v1:0` | Amazon Nova | ❌ | ✅ | $0.06/$0.24 | Fast processing, high-volume operations | | `us.amazon.nova-micro-v1:0` | Amazon Nova | ❌ | ❌ | $0.035/$0.14 | Ultra-low latency, real-time monitoring | +| `us.anthropic.claude-opus-4-7` | Anthropic | ✅ | ✅ | $5.00/$25.00 | Latest Opus with adaptive thinking | | `us.anthropic.claude-opus-4-6-v1`* | Anthropic | ✅ | ✅ | $5.00/$25.00 | World-class coding, enterprise agents | | `us.anthropic.claude-sonnet-4-6` | Anthropic | ✅ | ✅ | $3.00/$15.00 | Frontier intelligence, enterprise scale | | `us.anthropic.claude-opus-4-5-20251101-v1:0` | Anthropic | ✅ | ✅ | $5.00/$25.00 | Multi-day software development | @@ -1780,6 +1781,8 @@ PentAGI supports 21 AWS Bedrock models with tool calling, streaming, and multimo **Prices**: Per 1M tokens. Models with thinking/reasoning support additional compute costs during reasoning phase. +**Reasoning Modes**: Claude Opus 4.7 on Bedrock requires Adaptive mode, and Claude Opus/Sonnet 4.6+ support it. In provider settings, select `Reasoning Mode: Adaptive` and use `Reasoning Effort` to send Bedrock `output_config.effort` instead of a fixed thinking token budget. + #### Tested but Incompatible Models Some AWS Bedrock models were tested but are **not supported** due to technical limitations: