From c9a4b05345fde898515880c6078a4c9816235df1 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Tue, 9 Jun 2026 13:14:35 -0400 Subject: [PATCH 01/20] feat(ibmcloud): add GitHub Actions runner support for IBM Power and IBM Z --- cmd/mapt/cmd/ibmcloud/hosts/ibm-power.go | 7 +++- cmd/mapt/cmd/ibmcloud/hosts/ibm-z.go | 7 +++- cmd/mapt/cmd/params/params.go | 28 ++++++++++----- pkg/integrations/github/ghrunner.go | 30 ++++++++++++---- .../github/snippet-linux-ppc64le.sh | 22 ++++++++++++ .../github/snippet-linux-s390x.sh | 22 ++++++++++++ pkg/integrations/github/types.go | 23 +++++++------ pkg/integrations/integrations.go | 21 ++++++------ .../ibmcloud/action/ibm-power/cloud-config | 10 ++++++ .../ibmcloud/action/ibm-power/ibm-power.go | 31 ++++++++++++----- .../action/ibm-power/ibm-power_test.go | 6 ++-- .../ibmcloud/action/ibm-z/cloud-config | 12 ++++++- pkg/provider/ibmcloud/action/ibm-z/ibm-z.go | 34 ++++++++++++++----- .../ibmcloud/action/ibm-z/ibm-z_test.go | 6 ++-- 14 files changed, 197 insertions(+), 62 deletions(-) create mode 100644 pkg/integrations/github/snippet-linux-ppc64le.sh create mode 100644 pkg/integrations/github/snippet-linux-s390x.sh diff --git a/cmd/mapt/cmd/ibmcloud/hosts/ibm-power.go b/cmd/mapt/cmd/ibmcloud/hosts/ibm-power.go index b62855dec..b1a743826 100644 --- a/cmd/mapt/cmd/ibmcloud/hosts/ibm-power.go +++ b/cmd/mapt/cmd/ibmcloud/hosts/ibm-power.go @@ -2,6 +2,7 @@ package hosts import ( "github.com/redhat-developer/mapt/cmd/mapt/cmd/params" + "github.com/redhat-developer/mapt/pkg/integrations/github" "github.com/redhat-developer/mapt/pkg/integrations/gitlab" maptContext "github.com/redhat-developer/mapt/pkg/manager/context" ibmpower "github.com/redhat-developer/mapt/pkg/provider/ibmcloud/action/ibm-power" @@ -43,6 +44,10 @@ func ibmPowerCreate() *cobra.Command { if err := viper.BindPFlags(cmd.Flags()); err != nil { return err } + ghRunnerArgs := params.GithubRunnerArgs() + if ghRunnerArgs != nil { + ghRunnerArgs.Arch = &github.Ppc64le + } return ibmpower.New( &maptContext.ContextArgs{ Context: cmd.Context(), @@ -52,7 +57,7 @@ func ibmPowerCreate() *cobra.Command { Debug: viper.IsSet(params.Debug), DebugLevel: viper.GetUint(params.DebugLevel), CirrusPWArgs: params.CirrusPersistentWorkerArgs(), - GHRunnerArgs: params.GithubRunnerArgs(), + GHRunnerArgs: ghRunnerArgs, GLRunnerArgs: params.GitLabRunnerArgs(&gitlab.Ppc64le), Tags: viper.GetStringMapString(params.Tags), }, diff --git a/cmd/mapt/cmd/ibmcloud/hosts/ibm-z.go b/cmd/mapt/cmd/ibmcloud/hosts/ibm-z.go index 31a0bebed..a2ba0be4c 100644 --- a/cmd/mapt/cmd/ibmcloud/hosts/ibm-z.go +++ b/cmd/mapt/cmd/ibmcloud/hosts/ibm-z.go @@ -2,6 +2,7 @@ package hosts import ( "github.com/redhat-developer/mapt/cmd/mapt/cmd/params" + "github.com/redhat-developer/mapt/pkg/integrations/github" "github.com/redhat-developer/mapt/pkg/integrations/gitlab" maptContext "github.com/redhat-developer/mapt/pkg/manager/context" ibmz "github.com/redhat-developer/mapt/pkg/provider/ibmcloud/action/ibm-z" @@ -43,6 +44,10 @@ func ibmZCreate() *cobra.Command { if err := viper.BindPFlags(cmd.Flags()); err != nil { return err } + ghRunnerArgs := params.GithubRunnerArgs() + if ghRunnerArgs != nil { + ghRunnerArgs.Arch = &github.S390x + } return ibmz.New( &maptContext.ContextArgs{ Context: cmd.Context(), @@ -52,7 +57,7 @@ func ibmZCreate() *cobra.Command { Debug: viper.IsSet(params.Debug), DebugLevel: viper.GetUint(params.DebugLevel), CirrusPWArgs: params.CirrusPersistentWorkerArgs(), - GHRunnerArgs: params.GithubRunnerArgs(), + GHRunnerArgs: ghRunnerArgs, GLRunnerArgs: params.GitLabRunnerArgs(&gitlab.S390x), Tags: viper.GetStringMapString(params.Tags), }, diff --git a/cmd/mapt/cmd/params/params.go b/cmd/mapt/cmd/params/params.go index f6583a661..bf8caead4 100644 --- a/cmd/mapt/cmd/params/params.go +++ b/cmd/mapt/cmd/params/params.go @@ -73,9 +73,14 @@ const ( CreateCmdName string = "create" DestroyCmdName string = "destroy" - ghActionsRunnerToken string = "ghactions-runner-token" - ghActionsRunnerRepo string = "ghactions-runner-repo" - ghActionsRunnerLabels string = "ghactions-runner-labels" + ghActionsRunnerToken string = "ghactions-runner-token" + ghActionsRunnerRepo string = "ghactions-runner-repo" + ghActionsRunnerLabels string = "ghactions-runner-labels" + ghActionsRunnerImageRepo string = "ghactions-runner-image-repo" + // TODO: once the RHEL script is merged to https://github.com/IBM/action-runner-image-pz, + // switch default from deekay2310 fork to IBM upstream. + ghActionsRunnerImageRepoDefault string = "https://github.com/deekay2310/action-runner-image-pz.git" + GHActionsRunnerImageRepoDesc string = "Git clone URL for the action-runner-image-pz repository, used to build the GitHub Actions runner from source on ppc64le/s390x (no official binaries exist for these architectures)" cirrusPWToken string = "it-cirrus-pw-token" cirrusPWTokenDesc string = "Add mapt target as a cirrus persistent worker. The value will hold a valid token to be used by cirrus cli to join the project." @@ -278,17 +283,18 @@ func AddGHActionsFlags(fs *pflag.FlagSet) { fs.StringP(ghActionsRunnerToken, "", "", GHActionsRunnerTokenDesc) fs.StringP(ghActionsRunnerRepo, "", "", GHActionsRunnerRepoDesc) fs.StringSlice(ghActionsRunnerLabels, nil, GHActionsRunnerLabelsDesc) + fs.StringP(ghActionsRunnerImageRepo, "", ghActionsRunnerImageRepoDefault, GHActionsRunnerImageRepoDesc) } func GithubRunnerArgs() *github.GithubRunnerArgs { if viper.IsSet(ghActionsRunnerToken) { return &github.GithubRunnerArgs{ - Token: viper.GetString(ghActionsRunnerToken), - RepoURL: viper.GetString(ghActionsRunnerRepo), - Labels: viper.GetStringSlice(ghActionsRunnerLabels), - Platform: &github.Linux, - Arch: linuxArchAsGithubActionsArch( - viper.GetString(LinuxArch)), + Token: viper.GetString(ghActionsRunnerToken), + RepoURL: viper.GetString(ghActionsRunnerRepo), + Labels: viper.GetStringSlice(ghActionsRunnerLabels), + Platform: &github.Linux, + Arch: linuxArchAsGithubActionsArch(viper.GetString(LinuxArch)), + RunnerImageRepo: viper.GetString(ghActionsRunnerImageRepo), } } return nil @@ -359,6 +365,10 @@ func linuxArchAsGithubActionsArch(arch string) *github.Arch { switch arch { case "x86_64": return &github.Amd64 + case "ppc64le": + return &github.Ppc64le + case "s390x": + return &github.S390x } return &github.Arm64 } diff --git a/pkg/integrations/github/ghrunner.go b/pkg/integrations/github/ghrunner.go index ecb772b03..17a3b44f2 100644 --- a/pkg/integrations/github/ghrunner.go +++ b/pkg/integrations/github/ghrunner.go @@ -23,12 +23,23 @@ var snippetLinux []byte //go:embed snippet-windows.ps1 var snippetWindows []byte +//go:embed snippet-linux-ppc64le.sh +var snippetLinuxPpc64le []byte + +//go:embed snippet-linux-s390x.sh +var snippetLinuxS390x []byte + var snippets map[Platform][]byte = map[Platform][]byte{ Darwin: snippetDarwin, Linux: snippetLinux, Windows: snippetWindows, } +var archSnippets map[Arch][]byte = map[Arch][]byte{ + Ppc64le: snippetLinuxPpc64le, + S390x: snippetLinuxS390x, +} + var runnerArgs *GithubRunnerArgs func Init(args *GithubRunnerArgs) { @@ -40,17 +51,22 @@ func (args *GithubRunnerArgs) GetUserDataValues() *integrations.UserDataValues { return nil } return &integrations.UserDataValues{ - Name: args.Name, - Token: args.Token, - Labels: getLabels(), - RepoURL: args.RepoURL, - CliURL: downloadURL(), + Name: args.Name, + Token: args.Token, + Labels: getLabels(), + RepoURL: args.RepoURL, + CliURL: downloadURL(), + RunnerImageRepo: args.RunnerImageRepo, } } func (args *GithubRunnerArgs) GetSetupScriptTemplate() string { - templateConfig := string(snippets[*runnerArgs.Platform][:]) - return templateConfig + if *runnerArgs.Platform == Linux && runnerArgs.Arch != nil { + if archSnippet, ok := archSnippets[*runnerArgs.Arch]; ok { + return string(archSnippet[:]) + } + } + return string(snippets[*runnerArgs.Platform][:]) } func GetRunnerArgs() *GithubRunnerArgs { diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh new file mode 100644 index 000000000..5c26bf1a1 --- /dev/null +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -euo pipefail + +git clone {{ .RunnerImageRepo }} /opt/action-runner-image-pz + +cd /opt/action-runner-image-pz +bash -c '. scripts/vm.sh rhel 9 minimal --skip-snap-lxd' + +cd /opt/runner-cache +export DOTNET_ROOT=/opt/dotnet +export PATH=$PATH:$DOTNET_ROOT + +./config.sh \ + --unattended \ + --disableupdate \ + --ephemeral \ + --name "{{ .Name }}" \ + --labels "{{ .Labels }}" \ + --url "{{ .RepoURL }}" \ + --token "{{ .Token }}" + +nohup ./run.sh > /var/log/gh-runner.log 2>&1 & diff --git a/pkg/integrations/github/snippet-linux-s390x.sh b/pkg/integrations/github/snippet-linux-s390x.sh new file mode 100644 index 000000000..f11e43e1b --- /dev/null +++ b/pkg/integrations/github/snippet-linux-s390x.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -euo pipefail + +git clone {{ .RunnerImageRepo }} /opt/action-runner-image-pz + +cd /opt/action-runner-image-pz +bash -c '. scripts/vm.sh ubuntu 22.04 minimal --skip-snap-lxd' + +cd /opt/runner-cache +export DOTNET_ROOT=/opt/dotnet +export PATH=$PATH:$DOTNET_ROOT + +./config.sh \ + --unattended \ + --disableupdate \ + --ephemeral \ + --name "{{ .Name }}" \ + --labels "{{ .Labels }}" \ + --url "{{ .RepoURL }}" \ + --token "{{ .Token }}" + +nohup ./run.sh > /var/log/gh-runner.log 2>&1 & diff --git a/pkg/integrations/github/types.go b/pkg/integrations/github/types.go index ccc8974d3..192d96c98 100644 --- a/pkg/integrations/github/types.go +++ b/pkg/integrations/github/types.go @@ -8,17 +8,20 @@ var ( Linux Platform = "linux" Darwin Platform = "osx" - Arm64 Arch = "arm64" - Amd64 Arch = "x64" - Arm Arch = "arm" + Arm64 Arch = "arm64" + Amd64 Arch = "x64" + Arm Arch = "arm" + Ppc64le Arch = "ppc64le" + S390x Arch = "s390x" ) type GithubRunnerArgs struct { - Token string - RepoURL string - Name string - Platform *Platform - Arch *Arch - Labels []string - User string + Token string + RepoURL string + Name string + Platform *Platform + Arch *Arch + Labels []string + User string + RunnerImageRepo string } diff --git a/pkg/integrations/integrations.go b/pkg/integrations/integrations.go index 7b59ceef7..b8d9394f5 100644 --- a/pkg/integrations/integrations.go +++ b/pkg/integrations/integrations.go @@ -6,16 +6,17 @@ import ( ) type UserDataValues struct { - CliURL string - User string - Name string - Token string - Labels string - Port string - RepoURL string - Executor string - Unsecure bool - Concurrent int + CliURL string + User string + Name string + Token string + Labels string + Port string + RepoURL string + Executor string + Unsecure bool + Concurrent int + RunnerImageRepo string } type IntegrationConfig interface { diff --git a/pkg/provider/ibmcloud/action/ibm-power/cloud-config b/pkg/provider/ibmcloud/action/ibm-power/cloud-config index 7088603ee..18efe36cd 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/cloud-config +++ b/pkg/provider/ibmcloud/action/ibm-power/cloud-config @@ -80,6 +80,13 @@ write_files: content: | {{.GitLabRunnerScript}} {{- end}} +{{- if .GHActionsRunnerScript}} + - path: /opt/install-ghrunner.sh + permissions: '0700' + owner: root:root + content: | +{{.GHActionsRunnerScript}} +{{- end}} runcmd: - systemctl enable mount-data-home.service - dnf install -y git podman policycoreutils-python-utils @@ -98,3 +105,6 @@ runcmd: - mkdir -p /var/log/gitlab-runner - bash /opt/install-glrunner.sh {{- end}} +{{- if .GHActionsRunnerScript}} + - bash /opt/install-ghrunner.sh +{{- end}} diff --git a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go index 0adde3b7c..a4618ae88 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go +++ b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go @@ -11,6 +11,7 @@ import ( "github.com/pulumi/pulumi/sdk/v3/go/auto" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" "github.com/redhat-developer/mapt/pkg/integrations" + "github.com/redhat-developer/mapt/pkg/integrations/github" "github.com/redhat-developer/mapt/pkg/integrations/gitlab" "github.com/redhat-developer/mapt/pkg/integrations/otelcol" "github.com/redhat-developer/mapt/pkg/manager" @@ -29,9 +30,10 @@ import ( var CloudConfig []byte type userDataValues struct { - Gateway string - OtelColScript string - GitLabRunnerScript string + Gateway string + OtelColScript string + GitLabRunnerScript string + GHActionsRunnerScript string } const ( @@ -183,6 +185,15 @@ func (r *pwRequest) deploy(ctx *pulumi.Context) error { } hasOtel := otelSet == 3 + ghRunnerScript := "" + if ghRunnerArgs := github.GetRunnerArgs(); ghRunnerArgs != nil { + s, err := integrations.GetIntegrationSnippetAsCloudInitWritableFile(ghRunnerArgs, defaultUser) + if err != nil { + return err + } + ghRunnerScript = *s + } + var piUserDataInput pulumi.StringPtrInput glRunnerArgs := gitlab.GetRunnerArgs() if glRunnerArgs != nil { @@ -192,6 +203,7 @@ func (r *pwRequest) deploy(ctx *pulumi.Context) error { } gateway := subnetInfo.Gateway localArgs := *glRunnerArgs + localGHScript := ghRunnerScript piUserDataInput = authToken.ApplyT(func(token string) (*string, error) { localArgs.AuthToken = token glSnippet, err := integrations.GetIntegrationSnippetAsCloudInitWritableFile(&localArgs, defaultUser) @@ -202,7 +214,7 @@ func (r *pwRequest) deploy(ctx *pulumi.Context) error { if hasOtel { otelArgs = r.otelArgs(true) } - ud, err := piUserData(gateway, otelArgs, *glSnippet) + ud, err := piUserData(gateway, otelArgs, *glSnippet, localGHScript) if err != nil { return nil, err } @@ -213,7 +225,7 @@ func (r *pwRequest) deploy(ctx *pulumi.Context) error { if hasOtel { otelArgs = r.otelArgs(false) } - ud, err := piUserData(subnetInfo.Gateway, otelArgs, "") + ud, err := piUserData(subnetInfo.Gateway, otelArgs, "", ghRunnerScript) if err != nil { return fmt.Errorf("failed to render user data: %w", err) } @@ -455,7 +467,7 @@ func (r *pwRequest) otelArgs(monitorGitLabRunner bool) *otelcol.OtelcolArgs { // piUserData renders the cloud-config template and returns it base64-encoded // for use as PiUserData on a PowerVS instance. -func piUserData(gateway string, otelArgs *otelcol.OtelcolArgs, glRunnerScript string) (string, error) { +func piUserData(gateway string, otelArgs *otelcol.OtelcolArgs, glRunnerScript, ghRunnerScript string) (string, error) { otelScript := "" if otelArgs != nil { s, err := otelcol.GetSnippetAsCloudInitWritableFile(otelArgs) @@ -466,9 +478,10 @@ func piUserData(gateway string, otelArgs *otelcol.OtelcolArgs, glRunnerScript st } script, err := file.Template( userDataValues{ - Gateway: gateway, - OtelColScript: otelScript, - GitLabRunnerScript: glRunnerScript, + Gateway: gateway, + OtelColScript: otelScript, + GitLabRunnerScript: glRunnerScript, + GHActionsRunnerScript: ghRunnerScript, }, string(CloudConfig)) if err != nil { diff --git a/pkg/provider/ibmcloud/action/ibm-power/ibm-power_test.go b/pkg/provider/ibmcloud/action/ibm-power/ibm-power_test.go index 12fee0b4d..d877bf8ec 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/ibm-power_test.go +++ b/pkg/provider/ibmcloud/action/ibm-power/ibm-power_test.go @@ -9,7 +9,7 @@ import ( ) func TestPiUserData_noRunner(t *testing.T) { - out, err := piUserData("10.0.0.1", nil, "") + out, err := piUserData("10.0.0.1", nil, "", "") if err != nil { t.Fatalf("piUserData returned error: %v", err) } @@ -31,7 +31,7 @@ func TestPiUserData_noRunner(t *testing.T) { func TestPiUserData_withRunner(t *testing.T) { script := " #!/bin/bash\n echo hello" - out, err := piUserData("10.0.0.1", nil, script) + out, err := piUserData("10.0.0.1", nil, script, "") if err != nil { t.Fatalf("piUserData returned error: %v", err) } @@ -63,7 +63,7 @@ func TestPiUserData_withOtelAndRunner(t *testing.T) { SecurePath: "/var/log/secure", MonitorGitLabRunner: true, } - out, err := piUserData("10.0.0.1", args, script) + out, err := piUserData("10.0.0.1", args, script, "") if err != nil { t.Fatalf("piUserData returned error: %v", err) } diff --git a/pkg/provider/ibmcloud/action/ibm-z/cloud-config b/pkg/provider/ibmcloud/action/ibm-z/cloud-config index a1546c5d2..5f0bf1588 100644 --- a/pkg/provider/ibmcloud/action/ibm-z/cloud-config +++ b/pkg/provider/ibmcloud/action/ibm-z/cloud-config @@ -1,5 +1,5 @@ #cloud-config -{{- if or .OtelColScript .GitLabRunnerScript}} +{{- if or .OtelColScript .GitLabRunnerScript .GHActionsRunnerScript}} write_files: {{- if .OtelColScript}} - path: /opt/install-otelcol.sh @@ -33,6 +33,13 @@ write_files: content: | {{.GitLabRunnerScript}} {{- end}} +{{- if .GHActionsRunnerScript}} + - path: /opt/install-ghrunner.sh + permissions: '0700' + owner: root:root + content: | +{{.GHActionsRunnerScript}} +{{- end}} {{- end}} runcmd: - apt-get update -y @@ -44,3 +51,6 @@ runcmd: - mkdir -p /var/log/gitlab-runner - bash /opt/install-glrunner.sh {{- end}} +{{- if .GHActionsRunnerScript}} + - bash /opt/install-ghrunner.sh +{{- end}} diff --git a/pkg/provider/ibmcloud/action/ibm-z/ibm-z.go b/pkg/provider/ibmcloud/action/ibm-z/ibm-z.go index 432baf915..1e1dda372 100644 --- a/pkg/provider/ibmcloud/action/ibm-z/ibm-z.go +++ b/pkg/provider/ibmcloud/action/ibm-z/ibm-z.go @@ -12,6 +12,7 @@ import ( "github.com/pulumi/pulumi/sdk/v3/go/auto" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" "github.com/redhat-developer/mapt/pkg/integrations" + "github.com/redhat-developer/mapt/pkg/integrations/github" "github.com/redhat-developer/mapt/pkg/integrations/gitlab" "github.com/redhat-developer/mapt/pkg/integrations/otelcol" "github.com/redhat-developer/mapt/pkg/manager" @@ -31,8 +32,9 @@ import ( var CloudConfig []byte type userDataValues struct { - OtelColScript string - GitLabRunnerScript string + OtelColScript string + GitLabRunnerScript string + GHActionsRunnerScript string } const ( @@ -360,8 +362,19 @@ func (r *zRequest) buildUserDataInput() (pulumi.StringPtrInput, error) { return nil, fmt.Errorf("partial otel configuration: --otel-app-code, --otel-auth-token, and --otel-index must all be set together") } hasOtel := otelSet == 3 + + ghRunnerScript := "" + if ghRunnerArgs := github.GetRunnerArgs(); ghRunnerArgs != nil { + s, err := integrations.GetIntegrationSnippetAsCloudInitWritableFile(ghRunnerArgs, defaultUser) + if err != nil { + return nil, err + } + ghRunnerScript = *s + } + if r.glAuthToken != nil { localArgs := *r.glRunnerArgsCopy + localGHScript := ghRunnerScript return r.glAuthToken.ApplyT(func(token string) (*string, error) { localArgs.AuthToken = token glSnippet, err := integrations.GetIntegrationSnippetAsCloudInitWritableFile(&localArgs, defaultUser) @@ -372,15 +385,19 @@ func (r *zRequest) buildUserDataInput() (pulumi.StringPtrInput, error) { if hasOtel { otelArgs = r.otelArgs(true) } - ud, err := izUserData(otelArgs, *glSnippet) + ud, err := izUserData(otelArgs, *glSnippet, localGHScript) if err != nil { return nil, err } return &ud, nil }).(pulumi.StringPtrOutput), nil } - if hasOtel { - ud, err := izUserData(r.otelArgs(false), "") + if hasOtel || ghRunnerScript != "" { + var otelArgs *otelcol.OtelcolArgs + if hasOtel { + otelArgs = r.otelArgs(false) + } + ud, err := izUserData(otelArgs, "", ghRunnerScript) if err != nil { return nil, fmt.Errorf("failed to render user data: %w", err) } @@ -403,7 +420,7 @@ func (r *zRequest) otelArgs(monitorGitLabRunner bool) *otelcol.OtelcolArgs { } } -func izUserData(otelArgs *otelcol.OtelcolArgs, glRunnerScript string) (string, error) { +func izUserData(otelArgs *otelcol.OtelcolArgs, glRunnerScript, ghRunnerScript string) (string, error) { otelScript := "" if otelArgs != nil { s, err := otelcol.GetSnippetAsCloudInitWritableFile(otelArgs) @@ -414,8 +431,9 @@ func izUserData(otelArgs *otelcol.OtelcolArgs, glRunnerScript string) (string, e } script, err := file.Template( userDataValues{ - OtelColScript: otelScript, - GitLabRunnerScript: glRunnerScript, + OtelColScript: otelScript, + GitLabRunnerScript: glRunnerScript, + GHActionsRunnerScript: ghRunnerScript, }, string(CloudConfig)) if err != nil { diff --git a/pkg/provider/ibmcloud/action/ibm-z/ibm-z_test.go b/pkg/provider/ibmcloud/action/ibm-z/ibm-z_test.go index b952fdebc..c2b84be7f 100644 --- a/pkg/provider/ibmcloud/action/ibm-z/ibm-z_test.go +++ b/pkg/provider/ibmcloud/action/ibm-z/ibm-z_test.go @@ -38,7 +38,7 @@ func decodeIzOutput(t *testing.T, out string) string { } func TestIzUserData_noRunner(t *testing.T) { - out, err := izUserData(nil, "") + out, err := izUserData(nil, "", "") if err != nil { t.Fatalf("izUserData returned error: %v", err) } @@ -56,7 +56,7 @@ func TestIzUserData_noRunner(t *testing.T) { func TestIzUserData_withRunner(t *testing.T) { script := " #!/bin/bash\n echo hello" - out, err := izUserData(nil, script) + out, err := izUserData(nil, script, "") if err != nil { t.Fatalf("izUserData returned error: %v", err) } @@ -81,7 +81,7 @@ func TestIzUserData_withOtelAndRunner(t *testing.T) { SecurePath: "/var/log/auth.log", MonitorGitLabRunner: true, } - out, err := izUserData(args, script) + out, err := izUserData(args, script, "") if err != nil { t.Fatalf("izUserData returned error: %v", err) } From 9af3e13553524ead564110558fce0c9f854707ad Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Tue, 9 Jun 2026 15:54:37 -0400 Subject: [PATCH 02/20] fix: harden --ghactions-runner-image-repo input - Quote the URL in snippet git clone commands to prevent shell injection - Add --depth=1 to limit clone exposure and speed up provisioning - Validate that only HTTPS URLs are accepted for the runner image repo Co-Authored-By: Claude Opus 4.6 --- cmd/mapt/cmd/params/params.go | 19 ++++++++++++++++++- .../github/snippet-linux-ppc64le.sh | 2 +- .../github/snippet-linux-s390x.sh | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/cmd/mapt/cmd/params/params.go b/cmd/mapt/cmd/params/params.go index bf8caead4..53762a0f7 100644 --- a/cmd/mapt/cmd/params/params.go +++ b/cmd/mapt/cmd/params/params.go @@ -1,6 +1,9 @@ package params import ( + "fmt" + "strings" + "github.com/redhat-developer/mapt/pkg/integrations/cirrus" "github.com/redhat-developer/mapt/pkg/integrations/github" "github.com/redhat-developer/mapt/pkg/integrations/gitlab" @@ -288,18 +291,32 @@ func AddGHActionsFlags(fs *pflag.FlagSet) { func GithubRunnerArgs() *github.GithubRunnerArgs { if viper.IsSet(ghActionsRunnerToken) { + imageRepo := viper.GetString(ghActionsRunnerImageRepo) + if imageRepo != "" { + if err := validateRunnerImageRepo(imageRepo); err != nil { + logging.Errorf("invalid --ghactions-runner-image-repo: %v", err) + return nil + } + } return &github.GithubRunnerArgs{ Token: viper.GetString(ghActionsRunnerToken), RepoURL: viper.GetString(ghActionsRunnerRepo), Labels: viper.GetStringSlice(ghActionsRunnerLabels), Platform: &github.Linux, Arch: linuxArchAsGithubActionsArch(viper.GetString(LinuxArch)), - RunnerImageRepo: viper.GetString(ghActionsRunnerImageRepo), + RunnerImageRepo: imageRepo, } } return nil } +func validateRunnerImageRepo(repo string) error { + if !strings.HasPrefix(repo, "https://") { + return fmt.Errorf("only HTTPS URLs are allowed, got: %s", repo) + } + return nil +} + func AddCirrusFlags(fs *pflag.FlagSet) { fs.StringP(cirrusPWToken, "", "", cirrusPWTokenDesc) fs.StringToStringP(cirrusPWLabels, "", nil, cirrusPWLabelsDesc) diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh index 5c26bf1a1..7d32e80d5 100644 --- a/pkg/integrations/github/snippet-linux-ppc64le.sh +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -git clone {{ .RunnerImageRepo }} /opt/action-runner-image-pz +git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz bash -c '. scripts/vm.sh rhel 9 minimal --skip-snap-lxd' diff --git a/pkg/integrations/github/snippet-linux-s390x.sh b/pkg/integrations/github/snippet-linux-s390x.sh index f11e43e1b..7dc116481 100644 --- a/pkg/integrations/github/snippet-linux-s390x.sh +++ b/pkg/integrations/github/snippet-linux-s390x.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -git clone {{ .RunnerImageRepo }} /opt/action-runner-image-pz +git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz bash -c '. scripts/vm.sh ubuntu 22.04 minimal --skip-snap-lxd' From f68b50986a3b7661e7fc39c6f6e623adc8a9bc97 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Tue, 9 Jun 2026 16:40:43 -0400 Subject: [PATCH 03/20] feat(ibmcloud): auto-generate GitHub Actions runner registration token --- cmd/mapt/cmd/params/params.go | 52 +++++++++++++++++++-------- pkg/integrations/github/api.go | 66 ++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 15 deletions(-) create mode 100644 pkg/integrations/github/api.go diff --git a/cmd/mapt/cmd/params/params.go b/cmd/mapt/cmd/params/params.go index 53762a0f7..b543762b0 100644 --- a/cmd/mapt/cmd/params/params.go +++ b/cmd/mapt/cmd/params/params.go @@ -2,6 +2,7 @@ package params import ( "fmt" + "os" "strings" "github.com/redhat-developer/mapt/pkg/integrations/cirrus" @@ -290,24 +291,45 @@ func AddGHActionsFlags(fs *pflag.FlagSet) { } func GithubRunnerArgs() *github.GithubRunnerArgs { - if viper.IsSet(ghActionsRunnerToken) { - imageRepo := viper.GetString(ghActionsRunnerImageRepo) - if imageRepo != "" { - if err := validateRunnerImageRepo(imageRepo); err != nil { - logging.Errorf("invalid --ghactions-runner-image-repo: %v", err) - return nil - } + token := viper.GetString(ghActionsRunnerToken) + repoURL := viper.GetString(ghActionsRunnerRepo) + pat := os.Getenv("GITHUB_TOKEN") + + if token == "" && pat == "" { + return nil + } + + if token == "" && repoURL == "" { + logging.Error("--ghactions-runner-repo is required for GitHub Actions runner setup") + return nil + } + + if token == "" { + logging.Info("no --ghactions-runner-token provided, auto-generating from GITHUB_TOKEN") + var err error + token, err = github.GenerateRegistrationToken(pat, repoURL) + if err != nil { + logging.Errorf("failed to auto-generate runner registration token: %v", err) + return nil } - return &github.GithubRunnerArgs{ - Token: viper.GetString(ghActionsRunnerToken), - RepoURL: viper.GetString(ghActionsRunnerRepo), - Labels: viper.GetStringSlice(ghActionsRunnerLabels), - Platform: &github.Linux, - Arch: linuxArchAsGithubActionsArch(viper.GetString(LinuxArch)), - RunnerImageRepo: imageRepo, + logging.Info("runner registration token generated successfully") + } + + imageRepo := viper.GetString(ghActionsRunnerImageRepo) + if imageRepo != "" { + if err := validateRunnerImageRepo(imageRepo); err != nil { + logging.Errorf("invalid --ghactions-runner-image-repo: %v", err) + return nil } } - return nil + return &github.GithubRunnerArgs{ + Token: token, + RepoURL: repoURL, + Labels: viper.GetStringSlice(ghActionsRunnerLabels), + Platform: &github.Linux, + Arch: linuxArchAsGithubActionsArch(viper.GetString(LinuxArch)), + RunnerImageRepo: imageRepo, + } } func validateRunnerImageRepo(repo string) error { diff --git a/pkg/integrations/github/api.go b/pkg/integrations/github/api.go new file mode 100644 index 000000000..ff66d844c --- /dev/null +++ b/pkg/integrations/github/api.go @@ -0,0 +1,66 @@ +package github + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "strings" +) + +type registrationTokenResponse struct { + Token string `json:"token"` + ExpiresAt string `json:"expires_at"` +} + +// GenerateRegistrationToken calls the GitHub API to create a short-lived +// runner registration token for the given repository. +// pat is a Personal Access Token with repo admin scope. +// repoURL is in the form "owner/repo" or "https://github.com/owner/repo". +func GenerateRegistrationToken(pat, repoURL string) (string, error) { + ownerRepo := repoURL + ownerRepo = strings.TrimPrefix(ownerRepo, "https://github.com/") + ownerRepo = strings.TrimPrefix(ownerRepo, "http://github.com/") + ownerRepo = strings.TrimSuffix(ownerRepo, "/") + + parts := strings.Split(ownerRepo, "/") + if len(parts) != 2 || parts[0] == "" || parts[1] == "" { + return "", fmt.Errorf("invalid repo format %q, expected owner/repo", repoURL) + } + + url := fmt.Sprintf("https://api.github.com/repos/%s/%s/actions/runners/registration-token", parts[0], parts[1]) + + req, err := http.NewRequest(http.MethodPost, url, nil) + if err != nil { + return "", fmt.Errorf("creating request: %w", err) + } + req.Header.Set("Authorization", "token "+pat) + req.Header.Set("Accept", "application/vnd.github+json") + req.Header.Set("X-GitHub-Api-Version", "2022-11-28") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", fmt.Errorf("calling GitHub API: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("reading response: %w", err) + } + + if resp.StatusCode != http.StatusCreated { + return "", fmt.Errorf("GitHub API returned %d: %s (ensure GITHUB_TOKEN has admin scope on the repo)", resp.StatusCode, string(body)) + } + + var tokenResp registrationTokenResponse + if err := json.Unmarshal(body, &tokenResp); err != nil { + return "", fmt.Errorf("parsing response: %w", err) + } + + if tokenResp.Token == "" { + return "", fmt.Errorf("empty token in GitHub API response") + } + + return tokenResp.Token, nil +} From 68f31f449f820b1dc3b0332bd8edfeb1ae2d7854 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Wed, 10 Jun 2026 17:21:56 -0400 Subject: [PATCH 04/20] fix(ibmcloud): install prerequisites before runner image build --- pkg/integrations/github/snippet-linux-ppc64le.sh | 2 ++ pkg/integrations/github/snippet-linux-s390x.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh index 7d32e80d5..26d513a73 100644 --- a/pkg/integrations/github/snippet-linux-ppc64le.sh +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash set -euo pipefail +dnf install -y git-core + git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz diff --git a/pkg/integrations/github/snippet-linux-s390x.sh b/pkg/integrations/github/snippet-linux-s390x.sh index 7dc116481..49323f30c 100644 --- a/pkg/integrations/github/snippet-linux-s390x.sh +++ b/pkg/integrations/github/snippet-linux-s390x.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash set -euo pipefail +apt-get update -y && apt-get install -y software-properties-common + git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz From 78d423b7f65376bd4eb512746b13a7898fa4e962 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Wed, 10 Jun 2026 18:07:41 -0400 Subject: [PATCH 05/20] ci: add s390x runner smoke test workflow Co-Authored-By: Claude Opus 4.6 --- .github/workflows/smoke-test-s390x.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/workflows/smoke-test-s390x.yaml diff --git a/.github/workflows/smoke-test-s390x.yaml b/.github/workflows/smoke-test-s390x.yaml new file mode 100644 index 000000000..41495913e --- /dev/null +++ b/.github/workflows/smoke-test-s390x.yaml @@ -0,0 +1,11 @@ +name: s390x Runner Smoke Test +on: workflow_dispatch +jobs: + smoke-test: + runs-on: [self-hosted, S390X] + steps: + - name: Check architecture + run: | + echo "Architecture: $(uname -m)" + cat /etc/os-release | grep PRETTY_NAME + echo "Runner is alive on $(arch)" From 753b7efe35936ee7bb147538e85a4abc9dc7cdfc Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Wed, 10 Jun 2026 18:16:32 -0400 Subject: [PATCH 06/20] fix(ibmcloud): tolerate flaky upstream test failures in runner build --- pkg/integrations/github/snippet-linux-ppc64le.sh | 8 +++++++- pkg/integrations/github/snippet-linux-s390x.sh | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh index 26d513a73..16348fe3c 100644 --- a/pkg/integrations/github/snippet-linux-ppc64le.sh +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -6,7 +6,13 @@ dnf install -y git-core git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz -bash -c '. scripts/vm.sh rhel 9 minimal --skip-snap-lxd' +# Allow build to continue past flaky upstream test failures +bash -c '. scripts/vm.sh rhel 9 minimal --skip-snap-lxd' || true + +if [ ! -f /opt/runner-cache/config.sh ]; then + echo "Runner binary not found after build — check build logs" >&2 + exit 1 +fi cd /opt/runner-cache export DOTNET_ROOT=/opt/dotnet diff --git a/pkg/integrations/github/snippet-linux-s390x.sh b/pkg/integrations/github/snippet-linux-s390x.sh index 49323f30c..60dadbc67 100644 --- a/pkg/integrations/github/snippet-linux-s390x.sh +++ b/pkg/integrations/github/snippet-linux-s390x.sh @@ -6,7 +6,13 @@ apt-get update -y && apt-get install -y software-properties-common git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz -bash -c '. scripts/vm.sh ubuntu 22.04 minimal --skip-snap-lxd' +# Allow build to continue past flaky upstream test failures +bash -c '. scripts/vm.sh ubuntu 22.04 minimal --skip-snap-lxd' || true + +if [ ! -f /opt/runner-cache/config.sh ]; then + echo "Runner binary not found after build — check build logs" >&2 + exit 1 +fi cd /opt/runner-cache export DOTNET_ROOT=/opt/dotnet From 9e03696e8ee6b0253aaa0b66989e7f6107eb51d1 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Wed, 10 Jun 2026 22:15:53 -0400 Subject: [PATCH 07/20] fix(ibmcloud): preserve runner binary if post-build installer fails --- pkg/integrations/github/snippet-linux-ppc64le.sh | 12 +++++++++++- pkg/integrations/github/snippet-linux-s390x.sh | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh index 16348fe3c..ac91cd369 100644 --- a/pkg/integrations/github/snippet-linux-ppc64le.sh +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -6,8 +6,18 @@ dnf install -y git-core git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz -# Allow build to continue past flaky upstream test failures +# Snapshot the runner binary as soon as it is built; a later installer +# failure (e.g. Docker GPG key) can trigger cleanup that deletes it. +(while [ ! -f /opt/runner-cache/config.sh ]; do sleep 10; done + cp -a /opt/runner-cache /opt/runner-backup) & +WATCHER_PID=$! + bash -c '. scripts/vm.sh rhel 9 minimal --skip-snap-lxd' || true +kill $WATCHER_PID 2>/dev/null || true + +if [ ! -f /opt/runner-cache/config.sh ] && [ -d /opt/runner-backup ]; then + mv /opt/runner-backup /opt/runner-cache +fi if [ ! -f /opt/runner-cache/config.sh ]; then echo "Runner binary not found after build — check build logs" >&2 diff --git a/pkg/integrations/github/snippet-linux-s390x.sh b/pkg/integrations/github/snippet-linux-s390x.sh index 60dadbc67..61da42b9e 100644 --- a/pkg/integrations/github/snippet-linux-s390x.sh +++ b/pkg/integrations/github/snippet-linux-s390x.sh @@ -6,8 +6,18 @@ apt-get update -y && apt-get install -y software-properties-common git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz -# Allow build to continue past flaky upstream test failures +# Snapshot the runner binary as soon as it is built; a later installer +# failure (e.g. Docker GPG key) can trigger cleanup that deletes it. +(while [ ! -f /opt/runner-cache/config.sh ]; do sleep 10; done + cp -a /opt/runner-cache /opt/runner-backup) & +WATCHER_PID=$! + bash -c '. scripts/vm.sh ubuntu 22.04 minimal --skip-snap-lxd' || true +kill $WATCHER_PID 2>/dev/null || true + +if [ ! -f /opt/runner-cache/config.sh ] && [ -d /opt/runner-backup ]; then + mv /opt/runner-backup /opt/runner-cache +fi if [ ! -f /opt/runner-cache/config.sh ]; then echo "Runner binary not found after build — check build logs" >&2 From 782906200033af50c0c9e1497d00685d9be4c927 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Wed, 10 Jun 2026 22:30:13 -0400 Subject: [PATCH 08/20] revert: remove unnecessary runner binary watcher from snippets --- pkg/integrations/github/snippet-linux-ppc64le.sh | 12 +----------- pkg/integrations/github/snippet-linux-s390x.sh | 12 +----------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh index ac91cd369..16348fe3c 100644 --- a/pkg/integrations/github/snippet-linux-ppc64le.sh +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -6,18 +6,8 @@ dnf install -y git-core git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz -# Snapshot the runner binary as soon as it is built; a later installer -# failure (e.g. Docker GPG key) can trigger cleanup that deletes it. -(while [ ! -f /opt/runner-cache/config.sh ]; do sleep 10; done - cp -a /opt/runner-cache /opt/runner-backup) & -WATCHER_PID=$! - +# Allow build to continue past flaky upstream test failures bash -c '. scripts/vm.sh rhel 9 minimal --skip-snap-lxd' || true -kill $WATCHER_PID 2>/dev/null || true - -if [ ! -f /opt/runner-cache/config.sh ] && [ -d /opt/runner-backup ]; then - mv /opt/runner-backup /opt/runner-cache -fi if [ ! -f /opt/runner-cache/config.sh ]; then echo "Runner binary not found after build — check build logs" >&2 diff --git a/pkg/integrations/github/snippet-linux-s390x.sh b/pkg/integrations/github/snippet-linux-s390x.sh index 61da42b9e..60dadbc67 100644 --- a/pkg/integrations/github/snippet-linux-s390x.sh +++ b/pkg/integrations/github/snippet-linux-s390x.sh @@ -6,18 +6,8 @@ apt-get update -y && apt-get install -y software-properties-common git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz -# Snapshot the runner binary as soon as it is built; a later installer -# failure (e.g. Docker GPG key) can trigger cleanup that deletes it. -(while [ ! -f /opt/runner-cache/config.sh ]; do sleep 10; done - cp -a /opt/runner-cache /opt/runner-backup) & -WATCHER_PID=$! - +# Allow build to continue past flaky upstream test failures bash -c '. scripts/vm.sh ubuntu 22.04 minimal --skip-snap-lxd' || true -kill $WATCHER_PID 2>/dev/null || true - -if [ ! -f /opt/runner-cache/config.sh ] && [ -d /opt/runner-backup ]; then - mv /opt/runner-backup /opt/runner-cache -fi if [ ! -f /opt/runner-cache/config.sh ]; then echo "Runner binary not found after build — check build logs" >&2 From a6893f04c899c6e1b878a3a1de20b044ce7a28a2 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Wed, 10 Jun 2026 23:02:46 -0400 Subject: [PATCH 09/20] fix(ibmcloud): run GitHub Actions runner as non-root user --- .../github/snippet-linux-ppc64le.sh | 33 +++++++++++-------- .../github/snippet-linux-s390x.sh | 33 +++++++++++-------- 2 files changed, 38 insertions(+), 28 deletions(-) diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh index 16348fe3c..d54eba29e 100644 --- a/pkg/integrations/github/snippet-linux-ppc64le.sh +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -14,17 +14,22 @@ if [ ! -f /opt/runner-cache/config.sh ]; then exit 1 fi -cd /opt/runner-cache -export DOTNET_ROOT=/opt/dotnet -export PATH=$PATH:$DOTNET_ROOT - -./config.sh \ - --unattended \ - --disableupdate \ - --ephemeral \ - --name "{{ .Name }}" \ - --labels "{{ .Labels }}" \ - --url "{{ .RepoURL }}" \ - --token "{{ .Token }}" - -nohup ./run.sh > /var/log/gh-runner.log 2>&1 & +id -u runner &>/dev/null || useradd -m -s /bin/bash runner +chown -R runner:runner /opt/runner-cache /opt/dotnet + +sudo -u runner bash -c ' + cd /opt/runner-cache + export DOTNET_ROOT=/opt/dotnet + export PATH=$PATH:$DOTNET_ROOT + + ./config.sh \ + --unattended \ + --disableupdate \ + --ephemeral \ + --name "{{ .Name }}" \ + --labels "{{ .Labels }}" \ + --url "{{ .RepoURL }}" \ + --token "{{ .Token }}" + + nohup ./run.sh > /tmp/gh-runner.log 2>&1 & +' diff --git a/pkg/integrations/github/snippet-linux-s390x.sh b/pkg/integrations/github/snippet-linux-s390x.sh index 60dadbc67..22bd2982d 100644 --- a/pkg/integrations/github/snippet-linux-s390x.sh +++ b/pkg/integrations/github/snippet-linux-s390x.sh @@ -14,17 +14,22 @@ if [ ! -f /opt/runner-cache/config.sh ]; then exit 1 fi -cd /opt/runner-cache -export DOTNET_ROOT=/opt/dotnet -export PATH=$PATH:$DOTNET_ROOT - -./config.sh \ - --unattended \ - --disableupdate \ - --ephemeral \ - --name "{{ .Name }}" \ - --labels "{{ .Labels }}" \ - --url "{{ .RepoURL }}" \ - --token "{{ .Token }}" - -nohup ./run.sh > /var/log/gh-runner.log 2>&1 & +id -u runner &>/dev/null || useradd -m -s /bin/bash runner +chown -R runner:runner /opt/runner-cache /opt/dotnet + +sudo -u runner bash -c ' + cd /opt/runner-cache + export DOTNET_ROOT=/opt/dotnet + export PATH=$PATH:$DOTNET_ROOT + + ./config.sh \ + --unattended \ + --disableupdate \ + --ephemeral \ + --name "{{ .Name }}" \ + --labels "{{ .Labels }}" \ + --url "{{ .RepoURL }}" \ + --token "{{ .Token }}" + + nohup ./run.sh > /tmp/gh-runner.log 2>&1 & +' From 60e93b8a80b11296d2b5bcc7687c719e581f3b19 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Thu, 11 Jun 2026 14:38:55 -0400 Subject: [PATCH 10/20] fix(ibmcloud): repair PAM config after runner build to preserve sshd The upstream configure-limits.sh appends duplicate pam_limits.so entries to system-auth and password-auth, causing sshd to drop connections before sending its banner. Deduplicate PAM entries and restart sshd after build. Co-Authored-By: Claude Opus 4.6 --- pkg/integrations/github/snippet-linux-ppc64le.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh index d54eba29e..3f8bac531 100644 --- a/pkg/integrations/github/snippet-linux-ppc64le.sh +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -9,6 +9,15 @@ cd /opt/action-runner-image-pz # Allow build to continue past flaky upstream test failures bash -c '. scripts/vm.sh rhel 9 minimal --skip-snap-lxd' || true +# The upstream configure-limits.sh appends duplicate pam_limits.so entries +# which breaks sshd (connection drops before banner). Deduplicate them. +for f in /etc/pam.d/system-auth /etc/pam.d/password-auth; do + if [ -f "$f" ]; then + awk '!seen[$0]++' "$f" > "${f}.tmp" && mv "${f}.tmp" "$f" + fi +done +systemctl restart sshd 2>/dev/null || true + if [ ! -f /opt/runner-cache/config.sh ]; then echo "Runner binary not found after build — check build logs" >&2 exit 1 From ef87753009029b76a2a5bcf3dc21b57b1b4ec0be Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Thu, 11 Jun 2026 15:46:42 -0400 Subject: [PATCH 11/20] debug(ibmcloud): add sshd watchdog and diagnostic logging to ppc64le snippet Adds a background monitor that logs sshd status every 30s during the runner build to identify what breaks SSH. After build completion, dumps full sshd diagnostics (config test, journal, host key perms, crypto policies, PAM config) and attempts repair. Co-Authored-By: Claude Opus 4.6 --- .../github/snippet-linux-ppc64le.sh | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh index 3f8bac531..d8ab808d2 100644 --- a/pkg/integrations/github/snippet-linux-ppc64le.sh +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -3,21 +3,51 @@ set -euo pipefail dnf install -y git-core +# Background sshd monitor: logs status every 30s to help diagnose build breakage +( + LOG=/var/log/sshd-watchdog.log + while true; do + echo "--- $(date) ---" >> "$LOG" + systemctl is-active sshd >> "$LOG" 2>&1 + ss -tlnp | grep :22 >> "$LOG" 2>&1 + sshd -T >> /var/log/sshd-configtest.log 2>&1 || echo "sshd -T FAILED (exit $?)" >> "$LOG" + sleep 30 + done +) & +WATCHDOG_PID=$! + git clone --depth=1 "{{ .RunnerImageRepo }}" /opt/action-runner-image-pz cd /opt/action-runner-image-pz # Allow build to continue past flaky upstream test failures bash -c '. scripts/vm.sh rhel 9 minimal --skip-snap-lxd' || true -# The upstream configure-limits.sh appends duplicate pam_limits.so entries -# which breaks sshd (connection drops before banner). Deduplicate them. +kill $WATCHDOG_PID 2>/dev/null || true + +echo "=== POST-BUILD SSHD DIAGNOSTICS ===" >> /var/log/sshd-watchdog.log +systemctl status sshd >> /var/log/sshd-watchdog.log 2>&1 +sshd -T >> /var/log/sshd-watchdog.log 2>&1 || echo "sshd -T FAILED" >> /var/log/sshd-watchdog.log +journalctl -u sshd --no-pager -n 50 >> /var/log/sshd-watchdog.log 2>&1 +ls -la /etc/ssh/ssh_host_* >> /var/log/sshd-watchdog.log 2>&1 +ls -la /usr/share/crypto-policies/ >> /var/log/sshd-watchdog.log 2>&1 +cat /etc/pam.d/system-auth >> /var/log/sshd-watchdog.log 2>&1 + +# Attempt sshd repair: fix PAM duplicates, restore permissions, restart for f in /etc/pam.d/system-auth /etc/pam.d/password-auth; do if [ -f "$f" ]; then awk '!seen[$0]++' "$f" > "${f}.tmp" && mv "${f}.tmp" "$f" fi done +chmod 600 /etc/ssh/ssh_host_*_key 2>/dev/null || true +chmod 644 /etc/ssh/ssh_host_*_key.pub 2>/dev/null || true +find /usr/share/crypto-policies/ -type f -exec chmod 644 {} + 2>/dev/null || true +find /usr/share/crypto-policies/ -type d -exec chmod 755 {} + 2>/dev/null || true systemctl restart sshd 2>/dev/null || true +echo "=== POST-REPAIR SSHD STATUS ===" >> /var/log/sshd-watchdog.log +systemctl status sshd >> /var/log/sshd-watchdog.log 2>&1 +ss -tlnp | grep :22 >> /var/log/sshd-watchdog.log 2>&1 + if [ ! -f /opt/runner-cache/config.sh ]; then echo "Runner binary not found after build — check build logs" >&2 exit 1 From 265345e21be4c661f8e227404c380a616d9a4679 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Thu, 11 Jun 2026 16:46:55 -0400 Subject: [PATCH 12/20] fix(ibmcloud): restore sshd privsep dir after runner build + COS diagnostics The upstream configure-system.sh runs chmod -R 777 /usr/share which makes /usr/share/empty.sshd (sshd's privilege separation directory) world-writable. sshd refuses to start when this directory is not owned by root or is world-writable. Fix by restoring 755 after the build. Also adds sshd watchdog logging with COS upload so diagnostics are accessible even when SSH is broken. COS credentials are passed through cloud-config template variables. Co-Authored-By: Claude Opus 4.6 --- .../github/snippet-linux-ppc64le.sh | 53 +++++++++++++++++-- .../ibmcloud/action/ibm-power/cloud-config | 6 ++- .../ibmcloud/action/ibm-power/ibm-power.go | 7 +++ 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh index d8ab808d2..72b5d2f63 100644 --- a/pkg/integrations/github/snippet-linux-ppc64le.sh +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -32,22 +32,65 @@ ls -la /etc/ssh/ssh_host_* >> /var/log/sshd-watchdog.log 2>&1 ls -la /usr/share/crypto-policies/ >> /var/log/sshd-watchdog.log 2>&1 cat /etc/pam.d/system-auth >> /var/log/sshd-watchdog.log 2>&1 -# Attempt sshd repair: fix PAM duplicates, restore permissions, restart +# The upstream configure-system.sh runs chmod -R 777 /usr/share which makes +# the sshd privilege separation directory world-writable. sshd refuses to +# start when /usr/share/empty.sshd is not owned by root or is world-writable. +chmod 755 /usr/share/empty.sshd 2>/dev/null || true +chown root:root /usr/share/empty.sshd 2>/dev/null || true +# Also fix PAM duplicates from configure-limits.sh for f in /etc/pam.d/system-auth /etc/pam.d/password-auth; do if [ -f "$f" ]; then awk '!seen[$0]++' "$f" > "${f}.tmp" && mv "${f}.tmp" "$f" fi done -chmod 600 /etc/ssh/ssh_host_*_key 2>/dev/null || true -chmod 644 /etc/ssh/ssh_host_*_key.pub 2>/dev/null || true -find /usr/share/crypto-policies/ -type f -exec chmod 644 {} + 2>/dev/null || true -find /usr/share/crypto-policies/ -type d -exec chmod 755 {} + 2>/dev/null || true systemctl restart sshd 2>/dev/null || true echo "=== POST-REPAIR SSHD STATUS ===" >> /var/log/sshd-watchdog.log systemctl status sshd >> /var/log/sshd-watchdog.log 2>&1 ss -tlnp | grep :22 >> /var/log/sshd-watchdog.log 2>&1 +# Upload diagnostics to COS so we can read them without SSH +python3 -c " +import hashlib, hmac, urllib.request, datetime, os, socket +key_id = os.environ.get('COS_KEY_ID', '') +secret = os.environ.get('COS_SECRET', '') +endpoint = os.environ.get('COS_ENDPOINT', '') +bucket = 'mapt-test-bucket-evidence' +hostname = socket.gethostname() +obj = 'debug/' + hostname + '-sshd-watchdog.log' +if key_id and secret and endpoint: + with open('/var/log/sshd-watchdog.log', 'rb') as f: + body = f.read() + now = datetime.datetime.utcnow() + date_stamp = now.strftime('%Y%m%d') + amz_date = now.strftime('%Y%m%dT%H%M%SZ') + region = 'us-south' + service = 's3' + host = endpoint.replace('https://','').replace('http://','') + canonical_uri = '/' + bucket + '/' + obj + payload_hash = hashlib.sha256(body).hexdigest() + canonical_headers = 'host:' + host + '\n' + 'x-amz-content-sha256:' + payload_hash + '\n' + 'x-amz-date:' + amz_date + '\n' + signed_headers = 'host;x-amz-content-sha256;x-amz-date' + canonical_request = 'PUT\n' + canonical_uri + '\n\n' + canonical_headers + '\n' + signed_headers + '\n' + payload_hash + algorithm = 'AWS4-HMAC-SHA256' + credential_scope = date_stamp + '/' + region + '/' + service + '/aws4_request' + string_to_sign = algorithm + '\n' + amz_date + '\n' + credential_scope + '\n' + hashlib.sha256(canonical_request.encode()).hexdigest() + def sign(key, msg): + return hmac.new(key, msg.encode(), hashlib.sha256).digest() + signing_key = sign(sign(sign(sign(('AWS4' + secret).encode(), date_stamp), region), service), 'aws4_request') + signature = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest() + auth = algorithm + ' Credential=' + key_id + '/' + credential_scope + ', SignedHeaders=' + signed_headers + ', Signature=' + signature + req = urllib.request.Request(endpoint + canonical_uri, data=body, method='PUT') + req.add_header('x-amz-date', amz_date) + req.add_header('x-amz-content-sha256', payload_hash) + req.add_header('Authorization', auth) + req.add_header('Content-Type', 'text/plain') + urllib.request.urlopen(req) + print('Uploaded diagnostics to COS: ' + obj) +else: + print('COS credentials not set, skipping upload') +" 2>&1 || echo "COS upload failed" + if [ ! -f /opt/runner-cache/config.sh ]; then echo "Runner binary not found after build — check build logs" >&2 exit 1 diff --git a/pkg/provider/ibmcloud/action/ibm-power/cloud-config b/pkg/provider/ibmcloud/action/ibm-power/cloud-config index 18efe36cd..83982e7bd 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/cloud-config +++ b/pkg/provider/ibmcloud/action/ibm-power/cloud-config @@ -106,5 +106,9 @@ runcmd: - bash /opt/install-glrunner.sh {{- end}} {{- if .GHActionsRunnerScript}} - - bash /opt/install-ghrunner.sh + - | + export COS_KEY_ID="{{ .COSAccessKeyID }}" + export COS_SECRET="{{ .COSSecretAccessKey }}" + export COS_ENDPOINT="{{ .COSEndpoint }}" + bash /opt/install-ghrunner.sh {{- end}} diff --git a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go index a4618ae88..ab2557560 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go +++ b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go @@ -4,6 +4,7 @@ import ( _ "embed" "encoding/base64" "fmt" + "os" "strings" "github.com/mapt-oss/pulumi-ibmcloud/sdk/go/ibmcloud" @@ -34,6 +35,9 @@ type userDataValues struct { OtelColScript string GitLabRunnerScript string GHActionsRunnerScript string + COSAccessKeyID string + COSSecretAccessKey string + COSEndpoint string } const ( @@ -482,6 +486,9 @@ func piUserData(gateway string, otelArgs *otelcol.OtelcolArgs, glRunnerScript, g OtelColScript: otelScript, GitLabRunnerScript: glRunnerScript, GHActionsRunnerScript: ghRunnerScript, + COSAccessKeyID: os.Getenv("IBMCLOUD_COS_ACCESS_KEY_ID"), + COSSecretAccessKey: os.Getenv("IBMCLOUD_COS_SECRET_ACCESS_KEY"), + COSEndpoint: os.Getenv("IBMCLOUD_COS_ENDPOINT"), }, string(CloudConfig)) if err != nil { From de8fc1f13186bce5b044814461fe3f103f175e42 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Thu, 11 Jun 2026 17:51:29 -0400 Subject: [PATCH 13/20] fix(ibmcloud): remove nonexistent /opt/dotnet from ppc64le snippet On RHEL 9/ppc64le, dotnet installs to /usr/lib64/dotnet via dnf, not /opt/dotnet. The GH runner is self-contained (uses ./bin/Runner.Listener) and does not need DOTNET_ROOT. The chown on /opt/dotnet caused cloud-init to fail after a successful build. Co-Authored-By: Claude Opus 4.6 --- pkg/integrations/github/snippet-linux-ppc64le.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/integrations/github/snippet-linux-ppc64le.sh b/pkg/integrations/github/snippet-linux-ppc64le.sh index 72b5d2f63..bd5da65ad 100644 --- a/pkg/integrations/github/snippet-linux-ppc64le.sh +++ b/pkg/integrations/github/snippet-linux-ppc64le.sh @@ -97,12 +97,10 @@ if [ ! -f /opt/runner-cache/config.sh ]; then fi id -u runner &>/dev/null || useradd -m -s /bin/bash runner -chown -R runner:runner /opt/runner-cache /opt/dotnet +chown -R runner:runner /opt/runner-cache sudo -u runner bash -c ' cd /opt/runner-cache - export DOTNET_ROOT=/opt/dotnet - export PATH=$PATH:$DOTNET_ROOT ./config.sh \ --unattended \ From 9f02ad6d1910ac387ed26e759640d065c314b29e Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Wed, 17 Jun 2026 15:30:47 -0400 Subject: [PATCH 14/20] feat(ibmcloud): auto-discover Power VS system type based on pool capacity --- .../ibmcloud/action/ibm-power/ibm-power.go | 17 +- pkg/provider/ibmcloud/data/pisystempools.go | 169 ++++++++++++++++++ 2 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 pkg/provider/ibmcloud/data/pisystempools.go diff --git a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go index ab2557560..ffe273337 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go +++ b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go @@ -119,6 +119,21 @@ func New(ctx *mc.ContextArgs, args *PWArgs) error { } prefix := util.If(len(args.Prefix) > 0, args.Prefix, "main") + + sysTypeResult, err := icdata.SelectSystemType(mCtx, &icdata.SystemPoolRequirements{ + CloudInstanceId: args.WorkspaceID, + Memory: args.Memory, + Processors: args.Processors, + PreferredType: args.SysType, + }) + if err != nil { + return fmt.Errorf("system type selection failed: %w", err) + } + if !sysTypeResult.IsPreferred { + logging.Warnf("using system type %s instead of requested %s due to capacity constraints", + sysTypeResult.SelectedType, args.SysType) + } + r := &pwRequest{ mCtx: mCtx, prefix: &prefix, @@ -128,7 +143,7 @@ func New(ctx *mc.ContextArgs, args *PWArgs) error { memory: args.Memory, processors: args.Processors, procType: args.ProcType, - sysType: args.SysType, + sysType: sysTypeResult.SelectedType, storageType: args.StorageType, diskSize: args.DiskSize, otelAppCode: args.OtelAppCode, diff --git a/pkg/provider/ibmcloud/data/pisystempools.go b/pkg/provider/ibmcloud/data/pisystempools.go new file mode 100644 index 000000000..54126bebf --- /dev/null +++ b/pkg/provider/ibmcloud/data/pisystempools.go @@ -0,0 +1,169 @@ +package data + +import ( + "fmt" + "math" + "os" + "sort" + "strings" + + v "github.com/IBM-Cloud/power-go-client/clients/instance" + ps "github.com/IBM-Cloud/power-go-client/ibmpisession" + "github.com/IBM-Cloud/power-go-client/power/models" + "github.com/IBM/go-sdk-core/v5/core" + mc "github.com/redhat-developer/mapt/pkg/manager/context" + icConstants "github.com/redhat-developer/mapt/pkg/provider/ibmcloud/constants" + "github.com/redhat-developer/mapt/pkg/util/logging" +) + +type SystemPoolRequirements struct { + CloudInstanceId string + Memory float64 + Processors float64 + PreferredType string +} + +type SystemPoolResult struct { + SelectedType string + IsPreferred bool +} + +func SelectSystemType(mCtx *mc.Context, args *SystemPoolRequirements) (*SystemPoolResult, error) { + client, err := piSystemPoolsClient(mCtx, args.CloudInstanceId) + if err != nil { + return nil, fmt.Errorf("failed to create system pools client: %w", err) + } + pools, err := client.GetSystemPools() + if err != nil { + return nil, fmt.Errorf("failed to query system pools: %w", err) + } + if len(pools) == 0 { + return nil, fmt.Errorf("no system pools available in workspace %s", args.CloudInstanceId) + } + + reqCores := args.Processors + reqMemory := int64(math.Ceil(args.Memory)) + + logPoolSummary(pools) + + if pool, ok := pools[args.PreferredType]; ok { + if poolHasCapacity(&pool, reqCores, reqMemory) { + logging.Infof("system type %s has sufficient capacity (requested: %.1f cores, %d GiB memory)", + args.PreferredType, reqCores, reqMemory) + return &SystemPoolResult{SelectedType: args.PreferredType, IsPreferred: true}, nil + } + logging.Warnf("requested system type %s has insufficient capacity; searching for alternatives...", args.PreferredType) + } else { + logging.Warnf("requested system type %s not found in workspace; searching for alternatives...", args.PreferredType) + } + + type candidate struct { + name string + headroom int64 + } + var candidates []candidate + for name, pool := range pools { + if name == args.PreferredType { + continue + } + if poolHasCapacity(&pool, reqCores, reqMemory) { + candidates = append(candidates, candidate{ + name: name, + headroom: poolAvailableMemory(&pool) - reqMemory, + }) + } + } + + if len(candidates) == 0 { + return nil, fmt.Errorf( + "no system pool has sufficient capacity for %.1f cores and %d GiB memory in workspace %s\n%s", + reqCores, reqMemory, args.CloudInstanceId, poolCapacitySummary(pools)) + } + + sort.Slice(candidates, func(i, j int) bool { + return candidates[i].headroom > candidates[j].headroom + }) + + selected := candidates[0].name + logging.Infof("auto-selected system type %s (requested %s was unavailable)", selected, args.PreferredType) + return &SystemPoolResult{SelectedType: selected, IsPreferred: false}, nil +} + +func poolHasCapacity(pool *models.SystemPool, cores float64, memory int64) bool { + if pool.MaxCoresAvailable != nil { + if pool.MaxCoresAvailable.Cores != nil && *pool.MaxCoresAvailable.Cores >= cores && + pool.MaxCoresAvailable.Memory != nil && *pool.MaxCoresAvailable.Memory >= memory { + return true + } + } + if pool.MaxMemoryAvailable != nil { + if pool.MaxMemoryAvailable.Cores != nil && *pool.MaxMemoryAvailable.Cores >= cores && + pool.MaxMemoryAvailable.Memory != nil && *pool.MaxMemoryAvailable.Memory >= memory { + return true + } + } + if pool.MaxAvailable != nil { + return pool.MaxAvailable.Cores != nil && *pool.MaxAvailable.Cores >= cores && + pool.MaxAvailable.Memory != nil && *pool.MaxAvailable.Memory >= memory + } + return false +} + +func poolAvailableMemory(pool *models.SystemPool) int64 { + if pool.MaxAvailable != nil && pool.MaxAvailable.Memory != nil { + return *pool.MaxAvailable.Memory + } + return 0 +} + +func logPoolSummary(pools models.SystemPools) { + for name, pool := range pools { + cores := float64(0) + mem := int64(0) + if pool.MaxAvailable != nil { + if pool.MaxAvailable.Cores != nil { + cores = *pool.MaxAvailable.Cores + } + if pool.MaxAvailable.Memory != nil { + mem = *pool.MaxAvailable.Memory + } + } + logging.Infof(" system pool %-8s: max available %.1f cores, %d GiB memory", name, cores, mem) + } +} + +func poolCapacitySummary(pools models.SystemPools) string { + var lines []string + for name, pool := range pools { + cores := float64(0) + mem := int64(0) + if pool.MaxAvailable != nil { + if pool.MaxAvailable.Cores != nil { + cores = *pool.MaxAvailable.Cores + } + if pool.MaxAvailable.Memory != nil { + mem = *pool.MaxAvailable.Memory + } + } + lines = append(lines, fmt.Sprintf(" %-8s: %.1f cores, %d GiB memory available", name, cores, mem)) + } + sort.Strings(lines) + return strings.Join(lines, "\n") +} + +func piSystemPoolsClient(mCtx *mc.Context, cloudInstanceId string) (*v.IBMPISystemPoolClient, error) { + options := &ps.IBMPIOptions{ + Authenticator: &core.IamAuthenticator{ + ApiKey: os.Getenv(icConstants.EnvIBMCloudAPIKey), + }, + UserAccount: os.Getenv(icConstants.EnvIBMCloudAccount), + Zone: os.Getenv("IC_ZONE"), + URL: powerURL(os.Getenv("IC_REGION")), + Debug: mCtx.Debug(), + } + session, err := ps.NewIBMPISession(options) + if err != nil { + return nil, err + } + return v.NewIBMPISystemPoolClient(mCtx.Context(), session, cloudInstanceId), nil +} From 246c1c056edc6b69f09e5b803a2538ee8ec5eca1 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Wed, 17 Jun 2026 16:05:59 -0400 Subject: [PATCH 15/20] fix(ibmcloud): harden GH runner params and system pool selection --- cmd/mapt/cmd/params/params.go | 2 +- pkg/integrations/github/api.go | 4 +++- pkg/manager/context/context.go | 2 +- pkg/provider/ibmcloud/data/pisystempools.go | 17 +++++++++++++++-- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cmd/mapt/cmd/params/params.go b/cmd/mapt/cmd/params/params.go index b543762b0..006b339c8 100644 --- a/cmd/mapt/cmd/params/params.go +++ b/cmd/mapt/cmd/params/params.go @@ -299,7 +299,7 @@ func GithubRunnerArgs() *github.GithubRunnerArgs { return nil } - if token == "" && repoURL == "" { + if repoURL == "" { logging.Error("--ghactions-runner-repo is required for GitHub Actions runner setup") return nil } diff --git a/pkg/integrations/github/api.go b/pkg/integrations/github/api.go index ff66d844c..a592cfc60 100644 --- a/pkg/integrations/github/api.go +++ b/pkg/integrations/github/api.go @@ -6,6 +6,7 @@ import ( "io" "net/http" "strings" + "time" ) type registrationTokenResponse struct { @@ -38,7 +39,8 @@ func GenerateRegistrationToken(pat, repoURL string) (string, error) { req.Header.Set("Accept", "application/vnd.github+json") req.Header.Set("X-GitHub-Api-Version", "2022-11-28") - resp, err := http.DefaultClient.Do(req) + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("calling GitHub API: %w", err) } diff --git a/pkg/manager/context/context.go b/pkg/manager/context/context.go index bf21c5e33..d41643f00 100644 --- a/pkg/manager/context/context.go +++ b/pkg/manager/context/context.go @@ -183,7 +183,7 @@ func addCommonTags(c *Context) { func manageIntegration(c *Context, ca *ContextArgs) error { if ca.GHRunnerArgs != nil { - ca.GHRunnerArgs.Name = c.RunID() + ca.GHRunnerArgs.Name = c.ProjectName() github.Init(ca.GHRunnerArgs) } if ca.CirrusPWArgs != nil { diff --git a/pkg/provider/ibmcloud/data/pisystempools.go b/pkg/provider/ibmcloud/data/pisystempools.go index 54126bebf..3029f4303 100644 --- a/pkg/provider/ibmcloud/data/pisystempools.go +++ b/pkg/provider/ibmcloud/data/pisystempools.go @@ -110,10 +110,23 @@ func poolHasCapacity(pool *models.SystemPool, cores float64, memory int64) bool } func poolAvailableMemory(pool *models.SystemPool) int64 { + var maxMem int64 + if pool.MaxCoresAvailable != nil && pool.MaxCoresAvailable.Memory != nil { + if *pool.MaxCoresAvailable.Memory > maxMem { + maxMem = *pool.MaxCoresAvailable.Memory + } + } + if pool.MaxMemoryAvailable != nil && pool.MaxMemoryAvailable.Memory != nil { + if *pool.MaxMemoryAvailable.Memory > maxMem { + maxMem = *pool.MaxMemoryAvailable.Memory + } + } if pool.MaxAvailable != nil && pool.MaxAvailable.Memory != nil { - return *pool.MaxAvailable.Memory + if *pool.MaxAvailable.Memory > maxMem { + maxMem = *pool.MaxAvailable.Memory + } } - return 0 + return maxMem } func logPoolSummary(pools models.SystemPools) { From 28e55f77500b1bf4f8ed356f6c68b9d2453b2eac Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Mon, 22 Jun 2026 15:08:25 -0400 Subject: [PATCH 16/20] fix(ibmcloud): remove nonexistent /opt/dotnet from s390x snippet The s390x runner snippet referenced /opt/dotnet in chown and environment setup, but the runner build extracts everything into /opt/runner-cache. This caused cloud-init to fail with "chown: cannot access '/opt/dotnet'" under set -euo pipefail, preventing runner registration even though the binary built successfully. Mirrors the same fix already applied to the ppc64le snippet in de8fc1f. Co-Authored-By: Claude Opus 4.6 --- pkg/integrations/github/snippet-linux-s390x.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/integrations/github/snippet-linux-s390x.sh b/pkg/integrations/github/snippet-linux-s390x.sh index 22bd2982d..634dc0a98 100644 --- a/pkg/integrations/github/snippet-linux-s390x.sh +++ b/pkg/integrations/github/snippet-linux-s390x.sh @@ -15,12 +15,10 @@ if [ ! -f /opt/runner-cache/config.sh ]; then fi id -u runner &>/dev/null || useradd -m -s /bin/bash runner -chown -R runner:runner /opt/runner-cache /opt/dotnet +chown -R runner:runner /opt/runner-cache sudo -u runner bash -c ' cd /opt/runner-cache - export DOTNET_ROOT=/opt/dotnet - export PATH=$PATH:$DOTNET_ROOT ./config.sh \ --unattended \ From 107ab7f081e184dbe103ec8cbdccbc6903df3167 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Mon, 22 Jun 2026 18:05:53 -0400 Subject: [PATCH 17/20] feat(ibmcloud): zone-level system type discovery with retry on capacity --- .../ibmcloud/action/ibm-power/ibm-power.go | 119 ++++++++---- pkg/provider/ibmcloud/data/pisystempools.go | 182 ------------------ pkg/provider/ibmcloud/data/pisystemtypes.go | 112 +++++++++++ 3 files changed, 198 insertions(+), 215 deletions(-) delete mode 100644 pkg/provider/ibmcloud/data/pisystempools.go create mode 100644 pkg/provider/ibmcloud/data/pisystemtypes.go diff --git a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go index ffe273337..717280dd3 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go +++ b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go @@ -120,50 +120,103 @@ func New(ctx *mc.ContextArgs, args *PWArgs) error { prefix := util.If(len(args.Prefix) > 0, args.Prefix, "main") - sysTypeResult, err := icdata.SelectSystemType(mCtx, &icdata.SystemPoolRequirements{ + sysTypes, err := icdata.GetAvailableSystemTypes(mCtx, &icdata.SystemTypeRequirements{ CloudInstanceId: args.WorkspaceID, - Memory: args.Memory, - Processors: args.Processors, + Zone: os.Getenv("IC_ZONE"), + ProcType: args.ProcType, PreferredType: args.SysType, }) if err != nil { - return fmt.Errorf("system type selection failed: %w", err) - } - if !sysTypeResult.IsPreferred { - logging.Warnf("using system type %s instead of requested %s due to capacity constraints", - sysTypeResult.SelectedType, args.SysType) - } - - r := &pwRequest{ - mCtx: mCtx, - prefix: &prefix, - piPrivateSubnetID: args.PIPrivateSubnetID, - workspaceID: args.WorkspaceID, - vpcPublicSubnetID: args.VPCPublicSubnetID, - memory: args.Memory, - processors: args.Processors, - procType: args.ProcType, - sysType: sysTypeResult.SelectedType, - storageType: args.StorageType, - diskSize: args.DiskSize, - otelAppCode: args.OtelAppCode, - otelAuthToken: args.OtelAuthToken, - otelEndpoint: args.OtelEndpoint, - otelIndex: args.OtelIndex, - otelExtraAttrs: args.OtelExtraAttrs, + return fmt.Errorf("system type discovery failed: %w", err) } + + var lastErr error + for i, sysType := range sysTypes.Types { + if i > 0 { + logging.Warnf("retrying with system type %s (%d/%d) after capacity failure", + sysType, i+1, len(sysTypes.Types)) + } + + r := &pwRequest{ + mCtx: mCtx, + prefix: &prefix, + piPrivateSubnetID: args.PIPrivateSubnetID, + workspaceID: args.WorkspaceID, + vpcPublicSubnetID: args.VPCPublicSubnetID, + memory: args.Memory, + processors: args.Processors, + procType: args.ProcType, + sysType: sysType, + storageType: args.StorageType, + diskSize: args.DiskSize, + otelAppCode: args.OtelAppCode, + otelAuthToken: args.OtelAuthToken, + otelEndpoint: args.OtelEndpoint, + otelIndex: args.OtelIndex, + otelExtraAttrs: args.OtelExtraAttrs, + } + cs := manager.Stack{ + StackName: mCtx.StackNameByProject(stackIBMPowerVS), + ProjectName: mCtx.ProjectName(), + BackedURL: mCtx.BackedURL(), + ProviderCredentials: ibmcloudp.DefaultCredentials, + DeployFunc: r.deploy, + } + sr, err := manager.UpStack(r.mCtx, cs) + if err == nil { + if i > 0 { + logging.Infof("provisioning succeeded with system type %s (attempt %d)", sysType, i+1) + } + return manageResults(mCtx, sr, prefix, r.vpcPublicSubnetID != "") + } + + lastErr = err + if !isCapacityError(err) { + return fmt.Errorf("stack creation failed: %w", err) + } + + logging.Warnf("capacity error with system type %s: %v", sysType, err) + + if i < len(sysTypes.Types)-1 { + logging.Infof("destroying partial stack before retry...") + if dErr := destroyForRetry(mCtx); dErr != nil { + logging.Warnf("failed to destroy partial stack: %v", dErr) + } + } + } + + return fmt.Errorf("all system types exhausted; last error: %w", lastErr) +} + +func isCapacityError(err error) bool { + if err == nil { + return false + } + errStr := strings.ToLower(err.Error()) + for _, pattern := range []string{ + "insufficient resources", + "no available host", + "capacity is not available", + "not enough resources", + "resource capacity", + "no hosts available", + "maximum capacity", + } { + if strings.Contains(errStr, pattern) { + return true + } + } + return false +} + +func destroyForRetry(mCtx *mc.Context) error { cs := manager.Stack{ StackName: mCtx.StackNameByProject(stackIBMPowerVS), ProjectName: mCtx.ProjectName(), BackedURL: mCtx.BackedURL(), ProviderCredentials: ibmcloudp.DefaultCredentials, - DeployFunc: r.deploy, - } - sr, err := manager.UpStack(r.mCtx, cs) - if err != nil { - return fmt.Errorf("stack creation failed: %w", err) } - return manageResults(mCtx, sr, prefix, r.vpcPublicSubnetID != "") + return manager.DestroyStack(mCtx, cs) } // Destroy tears down the Power VS stack identified by mCtxArgs. diff --git a/pkg/provider/ibmcloud/data/pisystempools.go b/pkg/provider/ibmcloud/data/pisystempools.go deleted file mode 100644 index 3029f4303..000000000 --- a/pkg/provider/ibmcloud/data/pisystempools.go +++ /dev/null @@ -1,182 +0,0 @@ -package data - -import ( - "fmt" - "math" - "os" - "sort" - "strings" - - v "github.com/IBM-Cloud/power-go-client/clients/instance" - ps "github.com/IBM-Cloud/power-go-client/ibmpisession" - "github.com/IBM-Cloud/power-go-client/power/models" - "github.com/IBM/go-sdk-core/v5/core" - mc "github.com/redhat-developer/mapt/pkg/manager/context" - icConstants "github.com/redhat-developer/mapt/pkg/provider/ibmcloud/constants" - "github.com/redhat-developer/mapt/pkg/util/logging" -) - -type SystemPoolRequirements struct { - CloudInstanceId string - Memory float64 - Processors float64 - PreferredType string -} - -type SystemPoolResult struct { - SelectedType string - IsPreferred bool -} - -func SelectSystemType(mCtx *mc.Context, args *SystemPoolRequirements) (*SystemPoolResult, error) { - client, err := piSystemPoolsClient(mCtx, args.CloudInstanceId) - if err != nil { - return nil, fmt.Errorf("failed to create system pools client: %w", err) - } - pools, err := client.GetSystemPools() - if err != nil { - return nil, fmt.Errorf("failed to query system pools: %w", err) - } - if len(pools) == 0 { - return nil, fmt.Errorf("no system pools available in workspace %s", args.CloudInstanceId) - } - - reqCores := args.Processors - reqMemory := int64(math.Ceil(args.Memory)) - - logPoolSummary(pools) - - if pool, ok := pools[args.PreferredType]; ok { - if poolHasCapacity(&pool, reqCores, reqMemory) { - logging.Infof("system type %s has sufficient capacity (requested: %.1f cores, %d GiB memory)", - args.PreferredType, reqCores, reqMemory) - return &SystemPoolResult{SelectedType: args.PreferredType, IsPreferred: true}, nil - } - logging.Warnf("requested system type %s has insufficient capacity; searching for alternatives...", args.PreferredType) - } else { - logging.Warnf("requested system type %s not found in workspace; searching for alternatives...", args.PreferredType) - } - - type candidate struct { - name string - headroom int64 - } - var candidates []candidate - for name, pool := range pools { - if name == args.PreferredType { - continue - } - if poolHasCapacity(&pool, reqCores, reqMemory) { - candidates = append(candidates, candidate{ - name: name, - headroom: poolAvailableMemory(&pool) - reqMemory, - }) - } - } - - if len(candidates) == 0 { - return nil, fmt.Errorf( - "no system pool has sufficient capacity for %.1f cores and %d GiB memory in workspace %s\n%s", - reqCores, reqMemory, args.CloudInstanceId, poolCapacitySummary(pools)) - } - - sort.Slice(candidates, func(i, j int) bool { - return candidates[i].headroom > candidates[j].headroom - }) - - selected := candidates[0].name - logging.Infof("auto-selected system type %s (requested %s was unavailable)", selected, args.PreferredType) - return &SystemPoolResult{SelectedType: selected, IsPreferred: false}, nil -} - -func poolHasCapacity(pool *models.SystemPool, cores float64, memory int64) bool { - if pool.MaxCoresAvailable != nil { - if pool.MaxCoresAvailable.Cores != nil && *pool.MaxCoresAvailable.Cores >= cores && - pool.MaxCoresAvailable.Memory != nil && *pool.MaxCoresAvailable.Memory >= memory { - return true - } - } - if pool.MaxMemoryAvailable != nil { - if pool.MaxMemoryAvailable.Cores != nil && *pool.MaxMemoryAvailable.Cores >= cores && - pool.MaxMemoryAvailable.Memory != nil && *pool.MaxMemoryAvailable.Memory >= memory { - return true - } - } - if pool.MaxAvailable != nil { - return pool.MaxAvailable.Cores != nil && *pool.MaxAvailable.Cores >= cores && - pool.MaxAvailable.Memory != nil && *pool.MaxAvailable.Memory >= memory - } - return false -} - -func poolAvailableMemory(pool *models.SystemPool) int64 { - var maxMem int64 - if pool.MaxCoresAvailable != nil && pool.MaxCoresAvailable.Memory != nil { - if *pool.MaxCoresAvailable.Memory > maxMem { - maxMem = *pool.MaxCoresAvailable.Memory - } - } - if pool.MaxMemoryAvailable != nil && pool.MaxMemoryAvailable.Memory != nil { - if *pool.MaxMemoryAvailable.Memory > maxMem { - maxMem = *pool.MaxMemoryAvailable.Memory - } - } - if pool.MaxAvailable != nil && pool.MaxAvailable.Memory != nil { - if *pool.MaxAvailable.Memory > maxMem { - maxMem = *pool.MaxAvailable.Memory - } - } - return maxMem -} - -func logPoolSummary(pools models.SystemPools) { - for name, pool := range pools { - cores := float64(0) - mem := int64(0) - if pool.MaxAvailable != nil { - if pool.MaxAvailable.Cores != nil { - cores = *pool.MaxAvailable.Cores - } - if pool.MaxAvailable.Memory != nil { - mem = *pool.MaxAvailable.Memory - } - } - logging.Infof(" system pool %-8s: max available %.1f cores, %d GiB memory", name, cores, mem) - } -} - -func poolCapacitySummary(pools models.SystemPools) string { - var lines []string - for name, pool := range pools { - cores := float64(0) - mem := int64(0) - if pool.MaxAvailable != nil { - if pool.MaxAvailable.Cores != nil { - cores = *pool.MaxAvailable.Cores - } - if pool.MaxAvailable.Memory != nil { - mem = *pool.MaxAvailable.Memory - } - } - lines = append(lines, fmt.Sprintf(" %-8s: %.1f cores, %d GiB memory available", name, cores, mem)) - } - sort.Strings(lines) - return strings.Join(lines, "\n") -} - -func piSystemPoolsClient(mCtx *mc.Context, cloudInstanceId string) (*v.IBMPISystemPoolClient, error) { - options := &ps.IBMPIOptions{ - Authenticator: &core.IamAuthenticator{ - ApiKey: os.Getenv(icConstants.EnvIBMCloudAPIKey), - }, - UserAccount: os.Getenv(icConstants.EnvIBMCloudAccount), - Zone: os.Getenv("IC_ZONE"), - URL: powerURL(os.Getenv("IC_REGION")), - Debug: mCtx.Debug(), - } - session, err := ps.NewIBMPISession(options) - if err != nil { - return nil, err - } - return v.NewIBMPISystemPoolClient(mCtx.Context(), session, cloudInstanceId), nil -} diff --git a/pkg/provider/ibmcloud/data/pisystemtypes.go b/pkg/provider/ibmcloud/data/pisystemtypes.go new file mode 100644 index 000000000..c1dea5f36 --- /dev/null +++ b/pkg/provider/ibmcloud/data/pisystemtypes.go @@ -0,0 +1,112 @@ +package data + +import ( + "fmt" + "os" + "slices" + "strings" + + v "github.com/IBM-Cloud/power-go-client/clients/instance" + ps "github.com/IBM-Cloud/power-go-client/ibmpisession" + "github.com/IBM/go-sdk-core/v5/core" + mc "github.com/redhat-developer/mapt/pkg/manager/context" + icConstants "github.com/redhat-developer/mapt/pkg/provider/ibmcloud/constants" + "github.com/redhat-developer/mapt/pkg/util/logging" +) + +var DefaultSystemTypePriority = []string{"s1022", "s1122", "e1080", "e1050", "e980", "s922"} + +type SystemTypeRequirements struct { + CloudInstanceId string + Zone string + ProcType string + PreferredType string +} + +type SystemTypeResult struct { + Types []string +} + +func GetAvailableSystemTypes(mCtx *mc.Context, args *SystemTypeRequirements) (*SystemTypeResult, error) { + client, err := piDatacentersClient(mCtx, args.CloudInstanceId) + if err != nil { + return nil, fmt.Errorf("failed to create datacenters client: %w", err) + } + dc, err := client.Get(args.Zone) + if err != nil { + return nil, fmt.Errorf("failed to query datacenter %s: %w", args.Zone, err) + } + + var general, dedicated []string + if dc.CapabilitiesDetails != nil && dc.CapabilitiesDetails.SupportedSystems != nil { + general = dc.CapabilitiesDetails.SupportedSystems.General + dedicated = dc.CapabilitiesDetails.SupportedSystems.Dedicated + } + + types, err := filterAndPrioritize(args.PreferredType, args.ProcType, general, dedicated) + if err != nil { + return nil, err + } + + logging.Infof("zone %s supported system types (general=%v, dedicated=%v)", args.Zone, general, dedicated) + logging.Infof("system types to attempt (priority order): %v", types) + + return &SystemTypeResult{Types: types}, nil +} + +func filterAndPrioritize(preferred, procType string, general, dedicated []string) ([]string, error) { + var supported []string + if strings.EqualFold(procType, "dedicated") { + supported = dedicated + } else { + supported = general + } + + priority := buildPriorityList(preferred) + + var filtered []string + for _, t := range priority { + if slices.Contains(supported, t) { + filtered = append(filtered, t) + } + } + + if len(filtered) == 0 { + return nil, fmt.Errorf( + "no system types from priority list %v are supported in zone (supported: %v)", + priority, supported) + } + + return filtered, nil +} + +func buildPriorityList(preferred string) []string { + if preferred == "" { + return DefaultSystemTypePriority + } + + result := []string{preferred} + for _, t := range DefaultSystemTypePriority { + if t != preferred { + result = append(result, t) + } + } + return result +} + +func piDatacentersClient(mCtx *mc.Context, cloudInstanceId string) (*v.IBMPIDatacentersClient, error) { + options := &ps.IBMPIOptions{ + Authenticator: &core.IamAuthenticator{ + ApiKey: os.Getenv(icConstants.EnvIBMCloudAPIKey), + }, + UserAccount: os.Getenv(icConstants.EnvIBMCloudAccount), + Zone: os.Getenv("IC_ZONE"), + URL: powerURL(os.Getenv("IC_REGION")), + Debug: mCtx.Debug(), + } + session, err := ps.NewIBMPISession(options) + if err != nil { + return nil, err + } + return v.NewIBMPIDatacenterClient(mCtx.Context(), session, cloudInstanceId), nil +} From 43b28651995e9ed893cb619a8a39ebebe0ef005a Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Tue, 23 Jun 2026 17:01:02 -0400 Subject: [PATCH 18/20] fix(ibmcloud): harden Power VS provisioning and address PR review --- cmd/mapt/cmd/params/params.go | 5 +++ .../ibmcloud/action/ibm-power/cloud-config | 32 ++++++++++++------- .../ibmcloud/action/ibm-power/ibm-power.go | 2 +- pkg/provider/ibmcloud/data/pisystemtypes.go | 8 ++++- 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/cmd/mapt/cmd/params/params.go b/cmd/mapt/cmd/params/params.go index 006b339c8..23c8908da 100644 --- a/cmd/mapt/cmd/params/params.go +++ b/cmd/mapt/cmd/params/params.go @@ -321,6 +321,11 @@ func GithubRunnerArgs() *github.GithubRunnerArgs { logging.Errorf("invalid --ghactions-runner-image-repo: %v", err) return nil } + if imageRepo != ghActionsRunnerImageRepoDefault { + logging.Infof("using custom runner image repo: %s", imageRepo) + } else { + logging.Debugf("using temporary fork %s; will switch to IBM upstream once RHEL script is merged", imageRepo) + } } return &github.GithubRunnerArgs{ Token: token, diff --git a/pkg/provider/ibmcloud/action/ibm-power/cloud-config b/pkg/provider/ibmcloud/action/ibm-power/cloud-config index 83982e7bd..36fc00db6 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/cloud-config +++ b/pkg/provider/ibmcloud/action/ibm-power/cloud-config @@ -11,25 +11,33 @@ write_files: mkdir -p /home/containers/storage else DATA_DEV="" - while true; do + ATTEMPTS=0 + MAX_ATTEMPTS=90 + while [ $ATTEMPTS -lt $MAX_ATTEMPTS ]; do DATA_DEV=$(lsblk -rnpo NAME,TYPE | awk '$2=="mpath"{mpath[$1]=1} $2=="part"{p=$1; sub(/p?[0-9]+$/,"",p); has_part[p]=1} END{for(d in mpath) if(!has_part[d]&&d!~/control/) print d}' | head -1) [ -n "$DATA_DEV" ] && break udevadm trigger --subsystem-match=block 2>/dev/null || true udevadm settle 2>/dev/null || true multipathd reconfigure 2>/dev/null || true + ATTEMPTS=$((ATTEMPTS + 1)) sleep 10 done - udevadm settle 2>/dev/null || true - sleep 10 - mkfs.xfs -f -K "$DATA_DEV" - UUID=$(blkid -s UUID -o value "$DATA_DEV") - mkdir -p /mnt/home-tmp - cp -a /home/. /mnt/home-tmp/ - mount "$DATA_DEV" /home - cp -a /mnt/home-tmp/. /home/ - rm -rf /mnt/home-tmp - echo "UUID=$UUID /home xfs defaults 0 2" >> /etc/fstab - mkdir -p /home/containers/storage + if [ -z "$DATA_DEV" ]; then + echo "WARNING: data volume not found after $((MAX_ATTEMPTS * 10))s, using root filesystem" >&2 + mkdir -p /home/containers/storage + else + udevadm settle 2>/dev/null || true + sleep 10 + mkfs.xfs -f -K "$DATA_DEV" + UUID=$(blkid -s UUID -o value "$DATA_DEV") + mkdir -p /mnt/home-tmp + cp -a /home/. /mnt/home-tmp/ + mount "$DATA_DEV" /home + cp -a /mnt/home-tmp/. /home/ + rm -rf /mnt/home-tmp + echo "UUID=$UUID /home xfs defaults 0 2" >> /etc/fstab + mkdir -p /home/containers/storage + fi fi if ! mountpoint -q /var/lib/containers/storage; then mount --bind /home/containers/storage /var/lib/containers/storage diff --git a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go index 717280dd3..3cba7216a 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go +++ b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go @@ -180,7 +180,7 @@ func New(ctx *mc.ContextArgs, args *PWArgs) error { if i < len(sysTypes.Types)-1 { logging.Infof("destroying partial stack before retry...") if dErr := destroyForRetry(mCtx); dErr != nil { - logging.Warnf("failed to destroy partial stack: %v", dErr) + return fmt.Errorf("failed to destroy partial stack before retry: %w", dErr) } } } diff --git a/pkg/provider/ibmcloud/data/pisystemtypes.go b/pkg/provider/ibmcloud/data/pisystemtypes.go index c1dea5f36..b5ccec857 100644 --- a/pkg/provider/ibmcloud/data/pisystemtypes.go +++ b/pkg/provider/ibmcloud/data/pisystemtypes.go @@ -14,7 +14,7 @@ import ( "github.com/redhat-developer/mapt/pkg/util/logging" ) -var DefaultSystemTypePriority = []string{"s1022", "s1122", "e1080", "e1050", "e980", "s922"} +var DefaultSystemTypePriority = []string{"e1080", "e1050", "s1122", "s1022", "e980", "s922"} type SystemTypeRequirements struct { CloudInstanceId string @@ -71,6 +71,12 @@ func filterAndPrioritize(preferred, procType string, general, dedicated []string } } + for _, t := range supported { + if !slices.Contains(filtered, t) { + filtered = append(filtered, t) + } + } + if len(filtered) == 0 { return nil, fmt.Errorf( "no system types from priority list %v are supported in zone (supported: %v)", From 93c91ee8ccd6a0fab16e31fb432a7d791a1c92d7 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Tue, 23 Jun 2026 21:24:11 -0400 Subject: [PATCH 19/20] fix(ibmcloud): default pi-sys-type to auto-discovery and retry on timeout --- cmd/mapt/cmd/params/params.go | 4 ++-- pkg/provider/ibmcloud/action/ibm-power/ibm-power.go | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/mapt/cmd/params/params.go b/cmd/mapt/cmd/params/params.go index 23c8908da..37349d535 100644 --- a/cmd/mapt/cmd/params/params.go +++ b/cmd/mapt/cmd/params/params.go @@ -162,8 +162,8 @@ const ( PIProcTypeDesc string = "PowerVS processor type (shared, dedicated, capped)" PIProcTypeDefault string = "shared" PISysType string = "pi-sys-type" - PISysTypeDesc string = "PowerVS system type (s922, s1022, e880, e980)" - PISysTypeDefault string = "s1022" + PISysTypeDesc string = "preferred PowerVS system type (e.g. e1080, s1022, s1122); if unset, auto-discovered from zone" + PISysTypeDefault string = "" PIStorageType string = "pi-storage-type" PIStorageTypeDesc string = "PowerVS storage tier for instance and data volume (tier1, tier3)" PIStorageTypeDefault string = "tier1" diff --git a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go index 3cba7216a..ffa3b77c4 100644 --- a/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go +++ b/pkg/provider/ibmcloud/action/ibm-power/ibm-power.go @@ -201,6 +201,7 @@ func isCapacityError(err error) bool { "resource capacity", "no hosts available", "maximum capacity", + "context deadline exceeded", } { if strings.Contains(errStr, pattern) { return true From 7c2ee65e94f0c72393ac9ca7722b9a35c95ccd49 Mon Sep 17 00:00:00 2001 From: Dev Kumar Date: Tue, 23 Jun 2026 21:35:13 -0400 Subject: [PATCH 20/20] ci: retrigger after transient quay.io pull failure