From 249eacf0afbf5f38b6c532101ad81f586f4f40ee Mon Sep 17 00:00:00 2001 From: Shantanu Tripathi Date: Tue, 5 May 2026 20:29:46 +0000 Subject: [PATCH] feat(inference-operator): Update inference helm chart to v2.1.1 with latest CRDs Update inference operator chart from AWSCrescendoInferenceOperator dist. Includes new CRD schemas, init container support, custom service accounts flag, and templated manager configuration. Excludes pdSpec (disaggregated prefill/decode) as it is not yet GA. --- .../charts/inference-operator/Chart.yaml | 2 +- ...s.amazon.com_inferenceendpointconfigs.yaml | 335 +++++++++++++++++- ...emaker.aws.amazon.com_jumpstartmodels.yaml | 285 ++++++++++++++- ...on.com_sagemakerendpointregistrations.yaml | 103 ++++++ .../config/manager/manager.yaml | 14 +- .../charts/inference-operator/values.yaml | 4 + 6 files changed, 727 insertions(+), 16 deletions(-) diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml index d753d900..f4a92b05 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 2.1.0 +version: 2.1.1 # This is the version number of the application being deployed. Keep this aligned # with operator image MAJOR.MINOR version. diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml index a99280a3..c5c6bd38 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml @@ -270,7 +270,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -315,7 +315,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -343,6 +343,211 @@ spec: minimum: 0 type: integer type: object + dataCapture: + description: Configuration for data capture across multiple tiers + (SageMaker, LoadBalancer, Model Pod) + properties: + loadBalancer: + description: Configuration for LoadBalancer level data capture + (Tier 2) + properties: + enabled: + description: Enable or disable load balancer access logs + type: boolean + required: + - enabled + type: object + modelPod: + description: Configuration for Model Pod level data capture (Tier + 3) + properties: + bufferConfig: + description: Configuration for buffering and flushing captured + data + properties: + batchSize: + default: 10 + description: Number of records to batch before writing + to S3 + format: int32 + maximum: 1000 + minimum: 1 + type: integer + flushIntervalSeconds: + default: 60 + description: Flush interval in seconds + format: int32 + maximum: 300 + minimum: 10 + type: integer + type: object + captureContentTypeHeader: + description: Configuration for how to treat different content + type headers during capture + properties: + csvContentTypes: + description: |- + List of content type headers to treat as CSV + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: text/csv, application/csv + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + jsonContentTypes: + description: |- + List of content type headers to treat as JSON + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: application/json, application/jsonlines + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + type: object + captureOptions: + description: Capture options (Input, Output, or both). Defaults + to [Input, Output] when enabled. + items: + description: CaptureOption defines what data to capture + (input, output, or both). + properties: + captureMode: + description: 'Capture mode: Input or Output' + enum: + - Input + - Output + type: string + required: + - captureMode + type: object + maxItems: 32 + minItems: 1 + type: array + enabled: + description: Enable or disable model pod data capture + type: boolean + initialSamplingPercentage: + description: Percentage of requests to capture (0-100). Defaults + to 100 when enabled. + format: int32 + maximum: 100 + minimum: 0 + type: integer + kmsKeyId: + description: Optional KMS key ID, ARN, alias name, or alias + ARN for encrypting captured data + maxLength: 2048 + pattern: ^[a-zA-Z0-9:/_-]*$ + type: string + payloadConfig: + description: Configuration for payload size limits + properties: + maxPayloadSizeKB: + default: 0 + description: Maximum payload size in KB to capture. 0 + means no limit (capture full payload). + format: int32 + maximum: 10240 + minimum: 0 + type: integer + type: object + required: + - enabled + type: object + s3Uri: + description: |- + Common S3 URI for all data capture tiers. Each tier will write to a specific prefix within this bucket. + Must use s3:// protocol (required by ALB access logs). + If not provided, the TLS certificate bucket will be used for data capture storage. + maxLength: 512 + pattern: ^s3://([^/]+)(/[^,=]*)?$ + type: string + sagemakerEndpoint: + description: Configuration for SageMaker Endpoint level data capture + (Tier 1) + properties: + captureContentTypeHeader: + description: Configuration for how to treat different content + type headers during capture + properties: + csvContentTypes: + description: |- + List of content type headers to treat as CSV + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: text/csv, application/csv + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + jsonContentTypes: + description: |- + List of content type headers to treat as JSON + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: application/json, application/jsonlines + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + type: object + captureOptions: + description: Capture options (Input, Output, or both). Defaults + to [Input, Output] when enabled. + items: + description: CaptureOption defines what data to capture + (input, output, or both). + properties: + captureMode: + description: 'Capture mode: Input or Output' + enum: + - Input + - Output + type: string + required: + - captureMode + type: object + maxItems: 32 + minItems: 1 + type: array + enabled: + description: Enable or disable SageMaker endpoint data capture + type: boolean + initialSamplingPercentage: + description: Percentage of requests to capture (0-100). Defaults + to 100 when enabled. + format: int32 + maximum: 100 + minimum: 0 + type: integer + kmsKeyId: + description: Optional KMS key ID, ARN, alias name, or alias + ARN for encrypting captured data + maxLength: 2048 + pattern: ^[a-zA-Z0-9:/_-]*$ + type: string + required: + - enabled + type: object + type: object + dnsConfig: + description: DNS automation configuration for Route53. Requires tlsConfig.customCertificateConfig + to be set. + properties: + hostedZoneId: + description: Route53 Hosted Zone ID where the DNS record will + be created. + pattern: ^Z[A-Z0-9]+$ + type: string + required: + - hostedZoneId + type: object endpointName: description: |- Name used for Sagemaker Endpoint @@ -589,7 +794,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -636,7 +841,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -2230,6 +2435,14 @@ spec: maxLength: 253 pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string + serviceAccountName: + description: |- + Name of the Kubernetes ServiceAccount to use for the inference pod. + If not specified, the namespace's default service account will be used. + This is useful for providing AWS credentials via IRSA to init containers or the worker. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string volumes: description: |- Additional volumes to add to the pod spec. @@ -4249,6 +4462,50 @@ spec: required: - fileSystemId type: object + huggingFaceModel: + description: HuggingFace model configuration. Required when modelSourceType + is "huggingface". + properties: + commitSHA: + description: |- + Git commit SHA for the model revision. Must be a full 40-character lowercase hex SHA. + If not provided, the operator defaults to "main" branch. + pattern: ^[0-9a-f]{40}$ + type: string + modelId: + description: HuggingFace Hub model identifier in org/model + format (e.g. "meta-llama/Llama-3.1-8B-Instruct"). + pattern: ^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$ + type: string + tokenSecretRef: + description: |- + Reference to a Kubernetes Secret containing the HuggingFace API token. + The token is injected as the HF_TOKEN environment variable into the InitContainer only. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - modelId + type: object modelLocation: description: Specific location where the model data exists type: string @@ -4256,6 +4513,8 @@ spec: enum: - fsx - s3 + - huggingface + - kubernetesVolume type: string prefetchEnabled: default: false @@ -5378,6 +5637,34 @@ spec: - type type: object type: array + dataCaptureStatus: + description: Health status of the data capture pipeline + properties: + modelPod: + description: Health status of the model pod data capture tier + properties: + lastTransitionTime: + description: Time of the last health state transition + format: date-time + type: string + message: + description: Human-readable message describing the health + state + type: string + reason: + description: Reason for unhealthy status (e.g., OOMKilled, + S3UploadFailure, MultipleContainerRestarts) + type: string + status: + description: Current health status + enum: + - Healthy + - Unhealthy + type: string + required: + - status + type: object + type: object deploymentStatus: description: Details of the native kubernetes deployment that hosts the model @@ -5492,6 +5779,44 @@ spec: - lastUpdated - name type: object + dnsStatus: + description: Status of the operator-managed Route53 DNS record + properties: + dnsHealth: + description: 'DNS resolution status: Active, Pending, or Error.' + enum: + - Active + - Pending + - Error + type: string + hostedZoneId: + description: Route53 hosted zone ID. + type: string + lastTransitionTime: + description: When the status last transitioned, used for propagation + timeout. + format: date-time + type: string + managedByOperator: + description: Whether the operator manages this DNS record. + type: boolean + message: + description: Human-readable status or error message. + type: string + previousHostedZoneId: + description: Previous hosted zone ID, retained during domain/zone + changes until cleanup completes. + type: string + previousRecordName: + description: Previous record name, retained during domain/zone + changes until cleanup completes. + type: string + recordName: + description: Route53 record name. + type: string + required: + - managedByOperator + type: object endpoints: description: EndpointStatus contains the status of SageMaker endpoints properties: @@ -5801,7 +6126,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml index f86ce2d6..a5e37ffd 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml @@ -262,7 +262,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -307,7 +307,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -335,6 +335,211 @@ spec: minimum: 0 type: integer type: object + dataCapture: + description: Configuration for data capture across multiple tiers + (SageMaker, LoadBalancer, Model Pod) + properties: + loadBalancer: + description: Configuration for LoadBalancer level data capture + (Tier 2) + properties: + enabled: + description: Enable or disable load balancer access logs + type: boolean + required: + - enabled + type: object + modelPod: + description: Configuration for Model Pod level data capture (Tier + 3) + properties: + bufferConfig: + description: Configuration for buffering and flushing captured + data + properties: + batchSize: + default: 10 + description: Number of records to batch before writing + to S3 + format: int32 + maximum: 1000 + minimum: 1 + type: integer + flushIntervalSeconds: + default: 60 + description: Flush interval in seconds + format: int32 + maximum: 300 + minimum: 10 + type: integer + type: object + captureContentTypeHeader: + description: Configuration for how to treat different content + type headers during capture + properties: + csvContentTypes: + description: |- + List of content type headers to treat as CSV + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: text/csv, application/csv + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + jsonContentTypes: + description: |- + List of content type headers to treat as JSON + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: application/json, application/jsonlines + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + type: object + captureOptions: + description: Capture options (Input, Output, or both). Defaults + to [Input, Output] when enabled. + items: + description: CaptureOption defines what data to capture + (input, output, or both). + properties: + captureMode: + description: 'Capture mode: Input or Output' + enum: + - Input + - Output + type: string + required: + - captureMode + type: object + maxItems: 32 + minItems: 1 + type: array + enabled: + description: Enable or disable model pod data capture + type: boolean + initialSamplingPercentage: + description: Percentage of requests to capture (0-100). Defaults + to 100 when enabled. + format: int32 + maximum: 100 + minimum: 0 + type: integer + kmsKeyId: + description: Optional KMS key ID, ARN, alias name, or alias + ARN for encrypting captured data + maxLength: 2048 + pattern: ^[a-zA-Z0-9:/_-]*$ + type: string + payloadConfig: + description: Configuration for payload size limits + properties: + maxPayloadSizeKB: + default: 0 + description: Maximum payload size in KB to capture. 0 + means no limit (capture full payload). + format: int32 + maximum: 10240 + minimum: 0 + type: integer + type: object + required: + - enabled + type: object + s3Uri: + description: |- + Common S3 URI for all data capture tiers. Each tier will write to a specific prefix within this bucket. + Must use s3:// protocol (required by ALB access logs). + If not provided, the TLS certificate bucket will be used for data capture storage. + maxLength: 512 + pattern: ^s3://([^/]+)(/[^,=]*)?$ + type: string + sagemakerEndpoint: + description: Configuration for SageMaker Endpoint level data capture + (Tier 1) + properties: + captureContentTypeHeader: + description: Configuration for how to treat different content + type headers during capture + properties: + csvContentTypes: + description: |- + List of content type headers to treat as CSV + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: text/csv, application/csv + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + jsonContentTypes: + description: |- + List of content type headers to treat as JSON + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: application/json, application/jsonlines + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + type: object + captureOptions: + description: Capture options (Input, Output, or both). Defaults + to [Input, Output] when enabled. + items: + description: CaptureOption defines what data to capture + (input, output, or both). + properties: + captureMode: + description: 'Capture mode: Input or Output' + enum: + - Input + - Output + type: string + required: + - captureMode + type: object + maxItems: 32 + minItems: 1 + type: array + enabled: + description: Enable or disable SageMaker endpoint data capture + type: boolean + initialSamplingPercentage: + description: Percentage of requests to capture (0-100). Defaults + to 100 when enabled. + format: int32 + maximum: 100 + minimum: 0 + type: integer + kmsKeyId: + description: Optional KMS key ID, ARN, alias name, or alias + ARN for encrypting captured data + maxLength: 2048 + pattern: ^[a-zA-Z0-9:/_-]*$ + type: string + required: + - enabled + type: object + type: object + dnsConfig: + description: DNS automation configuration for Route53. Requires tlsConfig.customCertificateConfig + to be set. + properties: + hostedZoneId: + description: Route53 Hosted Zone ID where the DNS record will + be created. + pattern: ^Z[A-Z0-9]+$ + type: string + required: + - hostedZoneId + type: object environmentVariables: description: Additional environment variables to be passed to the inference server. Limited to 100 key-value pairs. @@ -576,7 +781,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -623,7 +828,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -937,6 +1142,34 @@ spec: - type type: object type: array + dataCaptureStatus: + description: Health status of the data capture pipeline + properties: + modelPod: + description: Health status of the model pod data capture tier + properties: + lastTransitionTime: + description: Time of the last health state transition + format: date-time + type: string + message: + description: Human-readable message describing the health + state + type: string + reason: + description: Reason for unhealthy status (e.g., OOMKilled, + S3UploadFailure, MultipleContainerRestarts) + type: string + status: + description: Current health status + enum: + - Healthy + - Unhealthy + type: string + required: + - status + type: object + type: object deploymentStatus: description: Details of the native kubernetes deployment that hosts the model @@ -1031,7 +1264,7 @@ spec: Total number of terminating pods targeted by this deployment. Terminating pods have a non-null .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. - This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + This is a beta field and requires enabling DeploymentReplicaSetTerminatingReplicas feature (enabled by default). format: int32 type: integer unavailableReplicas: @@ -1051,6 +1284,44 @@ spec: - lastUpdated - name type: object + dnsStatus: + description: Status of the operator-managed Route53 DNS record + properties: + dnsHealth: + description: 'DNS resolution status: Active, Pending, or Error.' + enum: + - Active + - Pending + - Error + type: string + hostedZoneId: + description: Route53 hosted zone ID. + type: string + lastTransitionTime: + description: When the status last transitioned, used for propagation + timeout. + format: date-time + type: string + managedByOperator: + description: Whether the operator manages this DNS record. + type: boolean + message: + description: Human-readable status or error message. + type: string + previousHostedZoneId: + description: Previous hosted zone ID, retained during domain/zone + changes until cleanup completes. + type: string + previousRecordName: + description: Previous record name, retained during domain/zone + changes until cleanup completes. + type: string + recordName: + description: Route53 record name. + type: string + required: + - managedByOperator + type: object endpoints: description: EndpointStatus contains the status of SageMaker endpoints properties: @@ -1352,7 +1623,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -1701,7 +1972,7 @@ spec: Total number of terminating pods targeted by this deployment. Terminating pods have a non-null .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. - This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + This is a beta field and requires enabling DeploymentReplicaSetTerminatingReplicas feature (enabled by default). format: int32 type: integer unavailableReplicas: diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_sagemakerendpointregistrations.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_sagemakerendpointregistrations.yaml index 80f1c56a..aaab9e29 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_sagemakerendpointregistrations.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_sagemakerendpointregistrations.yaml @@ -114,6 +114,95 @@ spec: description: REST API Gateway identifier that proxies requests to the HyperPod endpoint (via NLB/ALB) type: string + sageMakerEndpointDataCaptureConfig: + description: Configuration for SageMaker endpoint data capture + properties: + captureContentTypeHeader: + description: Configuration for how to treat different content + type headers during capture + properties: + csvContentTypes: + description: |- + List of content type headers to treat as CSV + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: text/csv, application/csv + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + jsonContentTypes: + description: |- + List of content type headers to treat as JSON + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: application/json, application/jsonlines + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + type: object + captureOptions: + description: Capture options specifying what to capture (Input, + Output, or both) + items: + description: CaptureOption defines what data to capture (input, + output, or both). + properties: + captureMode: + description: 'Capture mode: Input or Output' + enum: + - Input + - Output + type: string + required: + - captureMode + type: object + maxItems: 32 + minItems: 1 + type: array + destinationS3Uri: + description: S3 URI where captured data will be stored + maxLength: 512 + pattern: ^s3://([^/]+)(/.*)?$ + type: string + enabled: + description: Enable or disable SageMaker endpoint data capture + type: boolean + initialSamplingPercentage: + description: Percentage of requests to capture (0-100) + format: int32 + maximum: 100 + minimum: 0 + type: integer + kmsKeyId: + description: Optional KMS key ID, ARN, alias name, or alias ARN + for encrypting captured data + maxLength: 2048 + pattern: ^[a-zA-Z0-9:/_-]*$ + type: string + required: + - captureOptions + - destinationS3Uri + - enabled + - initialSamplingPercentage + type: object + tags: + description: User-defined tags to propagate to SageMaker resources + (Model, EndpointConfig, Endpoint) + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array tlsConfig: properties: tlsCertificateOutputS3Bucket: @@ -352,6 +441,20 @@ spec: description: REST API Gateway identifier that proxies requests to the HyperPod endpoint (via NLB/ALB) type: string + tags: + description: User-defined tags to propagate to SageMaker resources + (Model, EndpointConfig, Endpoint) + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array tlsConfig: properties: tlsCertificateOutputS3Bucket: diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/manager/manager.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/manager/manager.yaml index 24075cef..407f067b 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/manager/manager.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/manager/manager.yaml @@ -131,7 +131,7 @@ spec: env: - name: CHECKS value: "drivers crds" - image: "public.ecr.aws/bitnami/kubectl:1.30" + image: "{{ .Values.image.repository }}/{{ .Values.image.initContainer.repository }}:{{ .Values.image.initContainer.tag }}" imagePullPolicy: Always name: check-csi-drivers resources: { } @@ -190,8 +190,8 @@ spec: name: webhook-certs readOnly: true env: - - name: HYPERPOD_CLUSTER_ARN - value: {{ .Values.hyperpodClusterArn }} + - name: AWS_REGION + value: {{ .Values.region }} - name: EXECUTION_ROLE_ARN value: {{ .Values.executionRoleArn }} - name: JUMPSTART_GATED_MODEL_DOWNLOAD_ARN @@ -204,10 +204,18 @@ spec: value: {{ .Values.tlsCertificateS3Bucket }} - name: ENABLE_WEBHOOKS value: "{{ .Values.enableWebhooks }}" + - name: ENABLE_CUSTOM_SERVICE_ACCOUNTS + value: "{{ .Values.enableCustomServiceAccounts }}" + - name: OPERATOR_SERVICE_ACCOUNT_NAME + value: {{ .Values.namePrefix }}-controller-manager + - name: OPERATOR_NAMESPACE + value: {{ .Values.shortPrefix }}-system - name: CHART_VERSION value: {{ .Chart.Version | quote }} - name: APP_VERSION value: {{ .Chart.AppVersion | quote }} + - name: HYPERPOD_CLUSTER_ARN + value: {{ .Values.hyperpodClusterArn | default "" }} serviceAccountName: {{ .Values.namePrefix }}-controller-manager terminationGracePeriodSeconds: 10 volumes: diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml index 02793113..075a6df0 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml @@ -27,12 +27,16 @@ image: tag: v3.1 pullPolicy: Always repository: + initContainer: + repository: hyperpod-inference-operator-init-container + tag: v1.0 hyperpodClusterArn: executionRoleArn: jumpstartGatedModelDownloadRoleArn: "" stage: "prod" tlsCertificateS3Bucket: enableWebhooks: true +enableCustomServiceAccounts: false s3: enabled: true