diff --git a/genkit-tools/common/src/types/model.ts b/genkit-tools/common/src/types/model.ts index a36d9f288f..a08fe94ff7 100644 --- a/genkit-tools/common/src/types/model.ts +++ b/genkit-tools/common/src/types/model.ts @@ -20,7 +20,6 @@ import { DataPartSchema, MediaPartSchema, MultipartToolResponseSchema, - PartSchema, ReasoningPartSchema, ResourcePartSchema, TextPartSchema, @@ -30,7 +29,6 @@ import { type DataPart, type MediaPart, type MultipartToolResponse, - type Part, type ReasoningPart, type ResourcePart, type TextPart, @@ -42,7 +40,6 @@ export { DataPartSchema, MediaPartSchema, MultipartToolResponseSchema, - PartSchema, ReasoningPartSchema, ResourcePartSchema, TextPartSchema, @@ -52,7 +49,6 @@ export { type DataPart, type MediaPart, type MultipartToolResponse, - type Part, type ReasoningPart, type ResourcePart, type TextPart, @@ -61,11 +57,53 @@ export { }; // -// IMPORTANT: Keep this file in sync with genkit/ai/src/model.ts! +// IMPORTANT: Keep this file in sync with genkit/ai/src/model-types.ts! // +/** Descriptor for a registered middleware, returned by reflection API. */ +export const MiddlewareDescSchema = z.object({ + /** Unique name of the middleware. */ + name: z.string(), + /** Human-readable description of what the middleware does. */ + description: z.string().optional(), + /** JSON Schema for the middleware's configuration. */ + configSchema: z.record(z.any()).nullish(), + /** User defined metadata for the middleware. */ + metadata: z.record(z.any()).nullish(), +}); +export type MiddlewareDesc = z.infer; + +/** + * Zod schema of middleware reference. + */ +export const MiddlewareRefSchema = z.object({ + name: z.string(), + config: z.any().optional(), +}); + +/** + * Middleware reference. + */ +export type MiddlewareRef = z.infer; + +/** + * Zod schema of an opration representing a model reference. + */ +export const ModelReferenceSchema = z.object({ + name: z.string(), + configSchema: z.any().optional(), + info: z.any().optional(), + version: z.string().optional(), + config: z.any().optional(), +}); + +/** + * Model Reference + */ +export type ModelReference = z.infer; + /** - * Zod schema of an opration representing a background task. + * Zod schema of an operation representing a background task. */ export const OperationSchema = z.object({ action: z.string().optional(), @@ -81,6 +119,25 @@ export const OperationSchema = z.object({ */ export type OperationData = z.infer; +/** + * Zod schema of message part. + */ +export const PartSchema = z.union([ + TextPartSchema, + MediaPartSchema, + ToolRequestPartSchema, + ToolResponsePartSchema, + DataPartSchema, + CustomPartSchema, + ReasoningPartSchema, + ResourcePartSchema, +]); + +/** + * Message part. + */ +export type Part = z.infer; + /** * Zod schema of a message role. */ @@ -136,7 +193,7 @@ export const ModelInfoSchema = z.object({ constrained: z.enum(['none', 'all', 'no-tools']).optional(), /** Model supports controlling tool choice, e.g. forced tool calling. */ toolChoice: z.boolean().optional(), - /** Model supports long running operations. */ + /** Model can perform long-running operations. */ longRunning: z.boolean().optional(), }) .optional(), @@ -182,18 +239,64 @@ export const ToolDefinitionSchema = z.object({ */ export type ToolDefinition = z.infer; +/** + * Configuration parameter descriptions. + */ +export const GenerationCommonConfigDescriptions = { + temperature: + 'Controls the degree of randomness in token selection. A lower value is ' + + 'good for a more predictable response. A higher value leads to more ' + + 'diverse or unexpected results.', + maxOutputTokens: 'The maximum number of tokens to include in the response.', + topK: 'The maximum number of tokens to consider when sampling.', + topP: + 'Decides how many possible words to consider. A higher value means ' + + 'that the model looks at more possible words, even the less likely ' + + 'ones, which makes the generated text more diverse.', +}; + /** * Zod schema of a common config object. */ -export const GenerationCommonConfigSchema = z.object({ - /** A specific version of a model family, e.g. `gemini-1.0-pro-001` for the `gemini-1.0-pro` family. */ - version: z.string().optional(), - temperature: z.number().optional(), - maxOutputTokens: z.number().optional(), - topK: z.number().optional(), - topP: z.number().optional(), - stopSequences: z.array(z.string()).optional(), -}); +export const GenerationCommonConfigSchema = z + .object({ + version: z + .string() + .describe( + 'A specific version of a model family, e.g. `gemini-2.5-flash` ' + + 'for the `googleai` family.' + ) + .optional(), + temperature: z + .number() + .describe(GenerationCommonConfigDescriptions.temperature) + .optional(), + maxOutputTokens: z + .number() + .describe(GenerationCommonConfigDescriptions.maxOutputTokens) + .optional(), + topK: z + .number() + .describe(GenerationCommonConfigDescriptions.topK) + .optional(), + topP: z + .number() + .describe(GenerationCommonConfigDescriptions.topP) + .optional(), + stopSequences: z + .array(z.string()) + .describe( + 'Set of character sequences (up to 5) that will stop output generation.' + ) + .optional(), + apiKey: z + .string() + .describe( + 'API Key to use for the model call, overrides API key provided in plugin config.' + ) + .optional(), + }) + .passthrough(); /** * Common config object. @@ -379,6 +482,8 @@ export const GenerateActionOptionsSchema = z.object({ messages: z.array(MessageSchema), /** List of registered tool names for this generation if supported by the underlying model. */ tools: z.array(z.string()).optional(), + /** List of registered resource names for this generation if supported by the underlying model. */ + resources: z.array(z.string()).optional(), /** Tool calling mode. `auto` lets the model decide whether to use tools, `required` forces the model to choose a tool, and `none` forces the model not to use any tools. Defaults to `auto`. */ toolChoice: z.enum(['auto', 'required', 'none']).optional(), /** Configuration for the generation request. */ @@ -399,5 +504,7 @@ export const GenerateActionOptionsSchema = z.object({ maxTurns: z.number().optional(), /** Custom step name for this generate call to display in trace views. Defaults to "generate". */ stepName: z.string().optional(), + /** Middleware to apply to this generation. */ + use: z.array(MiddlewareRefSchema).optional(), }); export type GenerateActionOptions = z.infer; diff --git a/genkit-tools/common/src/types/parts.ts b/genkit-tools/common/src/types/parts.ts index eef420cc7b..1c23a905f0 100644 --- a/genkit-tools/common/src/types/parts.ts +++ b/genkit-tools/common/src/types/parts.ts @@ -32,7 +32,7 @@ const EmptyPartSchema = z.object({ * Zod schema for a text part. */ export const TextPartSchema = EmptyPartSchema.extend({ - /** The text of the document. */ + /** The text of the message. */ text: z.string(), }); @@ -89,6 +89,7 @@ export const ToolRequestSchema = z.object({ /** Whether the request is a partial chunk. */ partial: z.boolean().optional(), }); +export type ToolRequest = z.infer; /** * Zod schema of a tool request part. @@ -103,6 +104,9 @@ export const ToolRequestPartSchema = EmptyPartSchema.extend({ */ export type ToolRequestPart = z.infer; +/** + * Zod schema of a tool response. + */ const ToolResponseSchemaBase = z.object({ /** The call id or reference for a specific request. */ ref: z.string().optional(), @@ -116,14 +120,14 @@ const ToolResponseSchemaBase = z.object({ * Tool response part. */ export type ToolResponse = z.infer & { - content?: Part[]; + content?: TextOrMediaPart[]; }; export const ToolResponseSchema: z.ZodType = ToolResponseSchemaBase.extend({ content: z.array(z.any()).optional(), // TODO: switch to this once we have effective recursive schema support across the board. - // content: z.array(z.lazy(() => PartSchema)).optional(), + // content: z.array(z.lazy(() => DocumentPartSchema)).optional(), }); /** @@ -134,6 +138,9 @@ export const ToolResponsePartSchema = EmptyPartSchema.extend({ toolResponse: ToolResponseSchema, }); +/** + * Tool response part. + */ export type ToolResponsePart = z.infer; /** @@ -174,28 +181,17 @@ export const ResourcePartSchema = EmptyPartSchema.extend({ */ export type ResourcePart = z.infer; -/** - * Zod schema of message part. - */ -export const PartSchema = z.union([ - TextPartSchema, - MediaPartSchema, - ToolRequestPartSchema, - ToolResponsePartSchema, - DataPartSchema, - CustomPartSchema, - ReasoningPartSchema, - ResourcePartSchema, -]); - -/** - * Message part. - */ -export type Part = z.infer; +// Disclaimer: genkit/js/ai/parts.ts defines the following schema, type pair +// as PartSchema and Part, respectively. genkit-tools cannot retain those names +// due to it clashing with similar schema in model.ts, and genkit-tools +// exporting all types at root. We use a different name here and updated +// coresponding the imports. +export const TextOrMediaPartSchema = z.union([TextPartSchema, MediaPartSchema]); +export type TextOrMediaPart = z.infer; export const MultipartToolResponseSchema = z.object({ output: z.unknown().optional(), - content: z.array(PartSchema).optional(), + content: z.array(TextOrMediaPartSchema).optional(), }); export type MultipartToolResponse = z.infer; diff --git a/genkit-tools/genkit-schema.json b/genkit-tools/genkit-schema.json index 26cc4fbf4f..e2d173dd91 100644 --- a/genkit-tools/genkit-schema.json +++ b/genkit-tools/genkit-schema.json @@ -424,6 +424,12 @@ "type": "string" } }, + "resources": { + "type": "array", + "items": { + "type": "string" + } + }, "toolChoice": { "type": "string", "enum": [ @@ -466,6 +472,12 @@ }, "stepName": { "type": "string" + }, + "use": { + "type": "array", + "items": { + "$ref": "#/$defs/MiddlewareRef" + } } }, "required": [ @@ -601,28 +613,38 @@ "type": "object", "properties": { "version": { - "type": "string" + "type": "string", + "description": "A specific version of a model family, e.g. `gemini-2.5-flash` for the `googleai` family." }, "temperature": { - "type": "number" + "type": "number", + "description": "Controls the degree of randomness in token selection. A lower value is good for a more predictable response. A higher value leads to more diverse or unexpected results." }, "maxOutputTokens": { - "type": "number" + "type": "number", + "description": "The maximum number of tokens to include in the response." }, "topK": { - "type": "number" + "type": "number", + "description": "The maximum number of tokens to consider when sampling." }, "topP": { - "type": "number" + "type": "number", + "description": "Decides how many possible words to consider. A higher value means that the model looks at more possible words, even the less likely ones, which makes the generated text more diverse." }, "stopSequences": { "type": "array", "items": { "type": "string" - } + }, + "description": "Set of character sequences (up to 5) that will stop output generation." + }, + "apiKey": { + "type": "string", + "description": "API Key to use for the model call, overrides API key provided in plugin config." } }, - "additionalProperties": false + "additionalProperties": true }, "GenerationUsage": { "type": "object", @@ -734,6 +756,56 @@ ], "additionalProperties": false }, + "MiddlewareDesc": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "configSchema": { + "anyOf": [ + { + "type": "object", + "additionalProperties": {} + }, + { + "type": "null" + } + ] + }, + "metadata": { + "anyOf": [ + { + "type": "object", + "additionalProperties": {} + }, + { + "type": "null" + } + ] + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + "MiddlewareRef": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "config": {} + }, + "required": [ + "name" + ], + "additionalProperties": false + }, "ModelInfo": { "type": "object", "properties": { @@ -810,6 +882,24 @@ }, "additionalProperties": false }, + "ModelReference": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "configSchema": {}, + "info": {}, + "version": { + "type": "string" + }, + "config": {} + }, + "required": [ + "name" + ], + "additionalProperties": false + }, "ModelRequest": { "type": "object", "properties": { @@ -904,7 +994,7 @@ "content": { "type": "array", "items": { - "$ref": "#/$defs/Part" + "$ref": "#/$defs/TextOrMediaPart" } } }, @@ -1252,6 +1342,16 @@ ], "additionalProperties": false }, + "TextOrMediaPart": { + "anyOf": [ + { + "$ref": "#/$defs/TextPart" + }, + { + "$ref": "#/$defs/MediaPart" + } + ] + }, "ToolRequest": { "type": "object", "properties": { diff --git a/go/ai/gen.go b/go/ai/gen.go index e391ef2215..c52e273f74 100644 --- a/go/ai/gen.go +++ b/go/ai/gen.go @@ -82,7 +82,8 @@ type GenerateActionOptions struct { // Model is a model name (e.g., "vertexai/gemini-1.0-pro"). Model string `json:"model,omitempty"` // Output specifies the desired output format. Defaults to the model's default if unspecified. - Output *GenerateActionOutputConfig `json:"output,omitempty"` + Output *GenerateActionOutputConfig `json:"output,omitempty"` + Resources []string `json:"resources,omitempty"` // Resume provides options for resuming an interrupted generation. Resume *GenerateActionResume `json:"resume,omitempty"` // ReturnToolRequests, when true, returns tool calls for manual processing instead of @@ -95,7 +96,8 @@ type GenerateActionOptions struct { // the model to choose a tool, and none forces the model not to use any tools. Defaults to auto. ToolChoice ToolChoice `json:"toolChoice,omitempty"` // Tools is a list of registered tool names for this generation if supported. - Tools []string `json:"tools,omitempty"` + Tools []string `json:"tools,omitempty"` + Use []*MiddlewareRef `json:"use,omitempty"` } // GenerateActionResume holds options for resuming an interrupted generation. @@ -133,6 +135,8 @@ type GenerateActionOutputConfig struct { // GenerationCommonConfig holds configuration parameters for model generation requests. type GenerationCommonConfig struct { + // API Key to use for the model call, overrides API key provided in plugin config. + ApiKey string `json:"apiKey,omitempty"` // MaxOutputTokens limits the maximum number of tokens generated in the response. MaxOutputTokens int `json:"maxOutputTokens,omitempty"` // StopSequences specifies sequences that will cause generation to stop when encountered. @@ -206,6 +210,18 @@ type Message struct { Role Role `json:"role,omitempty"` } +type MiddlewareDesc struct { + ConfigSchema any `json:"configSchema,omitempty"` + Description string `json:"description,omitempty"` + Metadata any `json:"metadata,omitempty"` + Name string `json:"name,omitempty"` +} + +type MiddlewareRef struct { + Config any `json:"config,omitempty"` + Name string `json:"name,omitempty"` +} + // ModelInfo contains metadata about a model's capabilities and characteristics. type ModelInfo struct { // ConfigSchema defines the model-specific configuration schema. @@ -267,6 +283,14 @@ const ( ConstrainedSupportNoTools ConstrainedSupport = "no-tools" ) +type ModelReference struct { + Config any `json:"config,omitempty"` + ConfigSchema any `json:"configSchema,omitempty"` + Info any `json:"info,omitempty"` + Name string `json:"name,omitempty"` + Version string `json:"version,omitempty"` +} + // A ModelRequest is a request to generate completions from a model. type ModelRequest struct { // Config holds model-specific configuration parameters. diff --git a/go/core/schemas.config b/go/core/schemas.config index 70798f2eb3..1beb6f139e 100644 --- a/go/core/schemas.config +++ b/go/core/schemas.config @@ -894,6 +894,7 @@ TraceEvent omit GenerationCommonConfig.maxOutputTokens type int GenerationCommonConfig.topK type int +GenerationCommonConfig.apiKey name APIKey # Unused evaluation types BaseDataPoint omit @@ -1064,8 +1065,6 @@ GenerateActionOptionsResume name GenerateActionResume # GenerateActionOutputConfig GenerateActionOutputConfig.instructions type *string -GenerateActionOutputConfig.format name OutputConfigFormat -GenerateActionOutputConfig.jsonSchema name Schema GenerateActionOutputConfig.jsonSchema type map[string]any GenerateActionOutputConfig.constrained type bool @@ -1092,6 +1091,10 @@ ModelResponse.raw type any ModelResponse.operation type *Operation ModelResponse field formatHandler StreamingFormatHandler +# MultipartToolResponse +MultipartToolResponse pkg ai +MultipartToolResponse.content type []*Part + # ModelResponseChunk ModelResponseChunk pkg ai ModelResponseChunk.aggregated type bool diff --git a/go/internal/cmd/jsonschemagen/jsonschema.go b/go/internal/cmd/jsonschemagen/jsonschema.go index 0afbde5e74..5e3b944230 100644 --- a/go/internal/cmd/jsonschemagen/jsonschema.go +++ b/go/internal/cmd/jsonschemagen/jsonschema.go @@ -36,6 +36,7 @@ type Schema struct { Const any `json:"const,omitempty"` Required []string `json:"required,omitempty"` Items *Schema `json:"items,omitempty"` + MaxItems int `json:"maxItems,omitempty"` Enum []string `json:"enum,omitempty"` Not any `json:"not,omitempty"` AnyOf []*Schema `json:"anyOf,omitempty"` diff --git a/py/packages/genkit/src/genkit/core/typing.py b/py/packages/genkit/src/genkit/core/typing.py index 02f5927450..c48704d8ec 100644 --- a/py/packages/genkit/src/genkit/core/typing.py +++ b/py/packages/genkit/src/genkit/core/typing.py @@ -187,13 +187,29 @@ class GenerateActionOutputConfig(BaseModel): class GenerationCommonConfig(BaseModel): """Model for generationcommonconfig data.""" - model_config: ClassVar[ConfigDict] = ConfigDict(alias_generator=to_camel, extra='forbid', populate_by_name=True) - version: str | None = None - temperature: float | None = None - max_output_tokens: float | None = Field(default=None) - top_k: float | None = Field(default=None) - top_p: float | None = Field(default=None) - stop_sequences: list[str] | None = Field(default=None) + model_config: ClassVar[ConfigDict] = ConfigDict(alias_generator=to_camel, extra='allow', populate_by_name=True) + version: str | None = Field( + default=None, + description='A specific version of a model family, e.g. `gemini-2.5-flash` for the `googleai` family.', + ) + temperature: float | None = Field( + default=None, + description='Controls the degree of randomness in token selection. A lower value is good for a more predictable response. A higher value leads to more diverse or unexpected results.', + ) + max_output_tokens: float | None = Field( + default=None, description='The maximum number of tokens to include in the response.' + ) + top_k: float | None = Field(default=None, description='The maximum number of tokens to consider when sampling.') + top_p: float | None = Field( + default=None, + description='Decides how many possible words to consider. A higher value means that the model looks at more possible words, even the less likely ones, which makes the generated text more diverse.', + ) + stop_sequences: list[str] | None = Field( + default=None, description='Set of character sequences (up to 5) that will stop output generation.' + ) + api_key: str | None = Field( + default=None, description='API Key to use for the model call, overrides API key provided in plugin config.' + ) class GenerationUsage(BaseModel): @@ -216,6 +232,24 @@ class GenerationUsage(BaseModel): cached_content_tokens: float | None = Field(default=None) +class MiddlewareDesc(BaseModel): + """Model for middlewaredesc data.""" + + model_config: ClassVar[ConfigDict] = ConfigDict(alias_generator=to_camel, extra='forbid', populate_by_name=True) + name: str + description: str | None = None + config_schema: dict[str, Any] | None = Field(default=None) + metadata: dict[str, Any] | None = None + + +class MiddlewareRef(BaseModel): + """Model for middlewareref data.""" + + model_config: ClassVar[ConfigDict] = ConfigDict(alias_generator=to_camel, extra='forbid', populate_by_name=True) + name: str + config: Any | None = None + + class Constrained(StrEnum): """Constrained data type class.""" @@ -261,6 +295,17 @@ class ModelInfo(BaseModel): stage: Stage | None = None +class ModelReference(BaseModel): + """Model for modelreference data.""" + + model_config: ClassVar[ConfigDict] = ConfigDict(alias_generator=to_camel, extra='forbid', populate_by_name=True) + name: str + config_schema: Any | None = Field(default=None) + info: Any | None = None + version: str | None = None + config: Any | None = None + + class Error(BaseModel): """Model for error data.""" @@ -756,6 +801,12 @@ class ToolResponsePart(BaseModel): resource: Resource | None = None +class TextOrMediaPart(RootModel[TextPart | MediaPart]): + """Root model for textormediapart.""" + + root: TextPart | MediaPart + + class Link(BaseModel): """Model for link data.""" @@ -850,6 +901,14 @@ class Resume(BaseModel): metadata: dict[str, Any] | None = None +class MultipartToolResponse(BaseModel): + """Model for multiparttoolresponse data.""" + + model_config: ClassVar[ConfigDict] = ConfigDict(alias_generator=to_camel, extra='forbid', populate_by_name=True) + output: Any | None = None + content: list[TextOrMediaPart] | None = None + + class Part( RootModel[ TextPart | MediaPart | ToolRequestPart | ToolResponsePart | DataPart | CustomPart | ReasoningPart | ResourcePart @@ -930,14 +989,6 @@ class ModelResponseChunk(BaseModel): aggregated: Aggregated | None = None -class MultipartToolResponse(BaseModel): - """Model for multiparttoolresponse data.""" - - model_config: ClassVar[ConfigDict] = ConfigDict(alias_generator=to_camel, extra='forbid', populate_by_name=True) - output: Any | None = None - content: list[Part] | None = None - - class RerankerRequest(BaseModel): """Model for rerankerrequest data.""" @@ -1002,6 +1053,7 @@ class GenerateActionOptions(BaseModel): return_tool_requests: bool | None = Field(default=None) max_turns: float | None = Field(default=None) step_name: str | None = Field(default=None) + use: list[MiddlewareRef] | None = None class GenerateRequest(BaseModel):