@@ -32,7 +32,7 @@ metadata:
3232 capabilities : Seamless Upgrades
3333 categories : AI/Machine Learning,Monitoring
3434 containerImage : docker.io/rocm/gpu-operator:v1.2.0
35- createdAt : " 2025-06-12T00:51:00Z "
35+ createdAt : " 2025-07-17T08:55:25Z "
3636 description : |-
3737 Operator responsible for deploying AMD GPU kernel drivers, device plugin, device test runner and device metrics exporter
3838 For more information, visit [documentation](https://instinct.docs.amd.com/projects/gpu-operator/en/latest/)
@@ -598,6 +598,28 @@ spec:
598598 path : metricsExporter.upgradePolicy.upgradeStrategy
599599 x-descriptors :
600600 - urn:alm:descriptor:com.amd.deviceconfigs:upgradeStrategy
601+ - description : remediation workflow
602+ displayName : RemediationWorkflow
603+ path : remediationWorkflow
604+ x-descriptors :
605+ - urn:alm:descriptor:com.amd.deviceconfigs:remediationWorkflow
606+ - description : Name of the ConfigMap that holds condition-to-workflow mappings.
607+ displayName : ConditionalWorkflows
608+ path : remediationWorkflow.conditionalWorkflows
609+ x-descriptors :
610+ - urn:alm:descriptor:com.amd.deviceconfigs:conditionalWorkflows
611+ - description : enable remediation workflows. disabled by default enable if operator
612+ should automatically handle remediation of node incase of gpu issues
613+ displayName : Enable
614+ path : remediationWorkflow.enable
615+ x-descriptors :
616+ - urn:alm:descriptor:com.amd.deviceconfigs:enable
617+ - description : Time to live for argo workflow object and its pods for a failed
618+ workflow in hours. By default, it is set to 24 hours
619+ displayName : TtlForFailedWorkflows
620+ path : remediationWorkflow.ttlForFailedWorkflows
621+ x-descriptors :
622+ - urn:alm:descriptor:com.amd.deviceconfigs:ttlForFailedWorkflows
601623 - description : Selector describes on which nodes the GPU Operator should enable
602624 the GPU device.
603625 displayName : Selector
0 commit comments