Skip to content
Draft
1 change: 1 addition & 0 deletions cli/cmd/bootstrap_gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ func AddBootstrapGcpCmd(parent *cobra.Command, opts *GlobalOptions) {
parent.AddCommand(bootstrapGcpCmd.cmd)
AddBootstrapGcpPostconfigCmd(bootstrapGcpCmd.cmd, opts)
AddBootstrapGcpCleanupCmd(bootstrapGcpCmd.cmd, opts)
AddBootstrapGcpRestartVMsCmd(bootstrapGcpCmd.cmd, opts)
}

func (c *BootstrapGcpCmd) BootstrapGcp() error {
Expand Down
135 changes: 135 additions & 0 deletions cli/cmd/bootstrap_gcp_restart_vms.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// Copyright (c) Codesphere Inc.
// SPDX-License-Identifier: Apache-2.0

package cmd

import (
"fmt"
"log"
"os"

csio "github.com/codesphere-cloud/cs-go/pkg/io"
"github.com/codesphere-cloud/oms/internal/bootstrap"
"github.com/codesphere-cloud/oms/internal/bootstrap/gcp"
"github.com/codesphere-cloud/oms/internal/util"
"github.com/spf13/cobra"
)

type BootstrapGcpRestartVMsCmd struct {
cmd *cobra.Command
Opts *BootstrapGcpRestartVMsOpts
}

type BootstrapGcpRestartVMsOpts struct {
*GlobalOptions
ProjectID string
Zone string
Name string
}

func (c *BootstrapGcpRestartVMsCmd) RunE(_ *cobra.Command, args []string) error {
ctx := c.cmd.Context()
stlog := bootstrap.NewStepLogger(false)
fw := util.NewFilesystemWriter()

projectID := c.Opts.ProjectID
zone := c.Opts.Zone

// If only one of --project-id/--zone is provided, require both
if (projectID == "") != (zone == "") {
return fmt.Errorf("--project-id and --zone must be provided together")
}

if projectID == "" && zone == "" {
infraFilePath := gcp.GetInfraFilePath()
infraEnv, exists, err := gcp.LoadInfraFile(fw, infraFilePath)
if err != nil {
return fmt.Errorf("failed to load infra file: %w", err)
}
if !exists {
return fmt.Errorf("infra file not found at %s; use --project-id and --zone flags", infraFilePath)
}
projectID = infraEnv.ProjectID
zone = infraEnv.Zone
}

if projectID == "" {
return fmt.Errorf("project ID is required; set --project-id and --zone or ensure the infra file exists")
}
if zone == "" {
return fmt.Errorf("zone is required; set --project-id and --zone or ensure the infra file exists")
}

gcpClient := gcp.NewGCPClient(ctx, stlog, os.Getenv("GOOGLE_APPLICATION_CREDENTIALS"))

csEnv := &gcp.CodesphereEnvironment{
ProjectID: projectID,
Zone: zone,
}

bs, err := gcp.NewGCPBootstrapper(
ctx,
nil,
stlog,
csEnv,
nil,
gcpClient,
fw,
nil,
nil,
util.NewTime(),
nil,
)
if err != nil {
return fmt.Errorf("failed to create bootstrapper: %w", err)
}

if c.Opts.Name != "" {
log.Printf("Restarting VM %s in project %s (zone %s)...", c.Opts.Name, projectID, zone)
if err := bs.RestartVM(c.Opts.Name); err != nil {
return fmt.Errorf("failed to restart VM: %w", err)
}
log.Printf("VM %s restarted successfully.", c.Opts.Name)
} else {
log.Printf("Restarting all VMs in project %s (zone %s)...", projectID, zone)
if err := bs.RestartVMs(); err != nil {
return fmt.Errorf("failed to restart VMs: %w", err)
}
log.Printf("All VMs restarted successfully.")
}

return nil
}

func AddBootstrapGcpRestartVMsCmd(bootstrapGcp *cobra.Command, opts *GlobalOptions) {
restartVMs := BootstrapGcpRestartVMsCmd{
cmd: &cobra.Command{
Use: "restart-vms",
Short: "Restart stopped or terminated GCP VMs",
Long: csio.Long(`Restarts GCP compute instances that were stopped or terminated,
for example after spot VM preemption.
By default, restarts all VMs defined in the infrastructure.
Use --name to restart a single VM.
Project ID and zone are read from the local infra file if available,
or can be specified via flags.`),
Example: formatExamples("beta bootstrap-gcp restart-vms", []csio.Example{
{Desc: "Restart all VMs using project info from the local infra file"},
{Cmd: "--name jumpbox", Desc: "Restart only the jumpbox VM"},
{Cmd: "--name k0s-1", Desc: "Restart a specific k0s node"},
{Cmd: "--project-id my-project --zone us-central1-a", Desc: "Restart all VMs with explicit project and zone"},
{Cmd: "--project-id my-project --zone us-central1-a --name ceph-1", Desc: "Restart a specific VM with explicit project and zone"},
}),
},
Opts: &BootstrapGcpRestartVMsOpts{
GlobalOptions: opts,
},
}

flags := restartVMs.cmd.Flags()
flags.StringVar(&restartVMs.Opts.ProjectID, "project-id", "", "GCP Project ID (optional, will use infra file if not provided)")
flags.StringVar(&restartVMs.Opts.Zone, "zone", "", "GCP Zone (optional, will use infra file if not provided)")
flags.StringVar(&restartVMs.Opts.Name, "name", "", "Name of a specific VM to restart (e.g. jumpbox, postgres, ceph-1, k0s-1). Restarts all VMs if not specified.")

restartVMs.cmd.RunE = restartVMs.RunE
bootstrapGcp.AddCommand(restartVMs.cmd)
}
108 changes: 108 additions & 0 deletions cli/cmd/bootstrap_gcp_restart_vms_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright (c) Codesphere Inc.
// SPDX-License-Identifier: Apache-2.0

package cmd_test

import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/spf13/cobra"

"github.com/codesphere-cloud/oms/cli/cmd"
)

var _ = Describe("BootstrapGcpRestartVMsCmd", func() {
var globalOpts *cmd.GlobalOptions

BeforeEach(func() {
globalOpts = &cmd.GlobalOptions{}
})

Describe("BootstrapGcpRestartVMsOpts structure", func() {
Context("when initialized", func() {
It("should have correct default values", func() {
opts := &cmd.BootstrapGcpRestartVMsOpts{
GlobalOptions: globalOpts,
}
Expect(opts.ProjectID).To(Equal(""))
Expect(opts.Zone).To(Equal(""))
Expect(opts.Name).To(Equal(""))
})

It("should store provided values", func() {
opts := &cmd.BootstrapGcpRestartVMsOpts{
GlobalOptions: globalOpts,
ProjectID: "my-project",
Zone: "us-central1-a",
Name: "jumpbox",
}
Expect(opts.ProjectID).To(Equal("my-project"))
Expect(opts.Zone).To(Equal("us-central1-a"))
Expect(opts.Name).To(Equal("jumpbox"))
})
})
})

Describe("AddBootstrapGcpRestartVMsCmd", func() {
Context("when adding command", func() {
It("should not panic when adding to parent command", func() {
Expect(func() {
parentCmd := &cobra.Command{
Use: "bootstrap-gcp",
}
cmd.AddBootstrapGcpRestartVMsCmd(parentCmd, globalOpts)
}).NotTo(Panic())
})

It("should create command with correct flags", func() {
parentCmd := &cobra.Command{
Use: "bootstrap-gcp",
}
cmd.AddBootstrapGcpRestartVMsCmd(parentCmd, globalOpts)

restartCmd, _, err := parentCmd.Find([]string{"restart-vms"})
Expect(err).NotTo(HaveOccurred())
Expect(restartCmd).NotTo(BeNil())
Expect(restartCmd.Use).To(Equal("restart-vms"))

projectIDFlag := restartCmd.Flags().Lookup("project-id")
Expect(projectIDFlag).NotTo(BeNil())

zoneFlag := restartCmd.Flags().Lookup("zone")
Expect(zoneFlag).NotTo(BeNil())

nameFlag := restartCmd.Flags().Lookup("name")
Expect(nameFlag).NotTo(BeNil())
})

It("should bind flag values to opts", func() {
parentCmd := &cobra.Command{
Use: "bootstrap-gcp",
}
cmd.AddBootstrapGcpRestartVMsCmd(parentCmd, globalOpts)

restartCmd, _, err := parentCmd.Find([]string{"restart-vms"})
Expect(err).NotTo(HaveOccurred())
Expect(restartCmd).NotTo(BeNil())

err = restartCmd.Flags().Set("project-id", "flag-project")
Expect(err).NotTo(HaveOccurred())
projectIDVal, err := restartCmd.Flags().GetString("project-id")
Expect(err).NotTo(HaveOccurred())
Expect(projectIDVal).To(Equal("flag-project"))

err = restartCmd.Flags().Set("zone", "flag-zone")
Expect(err).NotTo(HaveOccurred())
zoneVal, err := restartCmd.Flags().GetString("zone")
Expect(err).NotTo(HaveOccurred())
Expect(zoneVal).To(Equal("flag-zone"))

err = restartCmd.Flags().Set("name", "jumpbox")
Expect(err).NotTo(HaveOccurred())
nameVal, err := restartCmd.Flags().GetString("name")
Expect(err).NotTo(HaveOccurred())
Expect(nameVal).To(Equal("jumpbox"))
})
})
})
})
1 change: 1 addition & 0 deletions docs/oms_beta_bootstrap-gcp.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,5 @@ oms beta bootstrap-gcp [flags]
* [oms beta](oms_beta.md) - Commands for early testing
* [oms beta bootstrap-gcp cleanup](oms_beta_bootstrap-gcp_cleanup.md) - Clean up GCP infrastructure created by bootstrap-gcp
* [oms beta bootstrap-gcp postconfig](oms_beta_bootstrap-gcp_postconfig.md) - Run post-configuration steps for GCP bootstrapping
* [oms beta bootstrap-gcp restart-vms](oms_beta_bootstrap-gcp_restart-vms.md) - Restart stopped or terminated GCP VMs

50 changes: 50 additions & 0 deletions docs/oms_beta_bootstrap-gcp_restart-vms.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
## oms beta bootstrap-gcp restart-vms

Restart stopped or terminated GCP VMs

### Synopsis

Restarts GCP compute instances that were stopped or terminated,
for example after spot VM preemption.
By default, restarts all VMs defined in the infrastructure.
Use --name to restart a single VM.
Project ID and zone are read from the local infra file if available,
or can be specified via flags.

```
oms beta bootstrap-gcp restart-vms [flags]
```

### Examples

```
# Restart all VMs using project info from the local infra file
$ oms beta bootstrap-gcp restart-vms

# Restart only the jumpbox VM
$ oms beta bootstrap-gcp restart-vms --name jumpbox

# Restart a specific k0s node
$ oms beta bootstrap-gcp restart-vms --name k0s-1

# Restart all VMs with explicit project and zone
$ oms beta bootstrap-gcp restart-vms --project-id my-project --zone us-central1-a

# Restart a specific VM with explicit project and zone
$ oms beta bootstrap-gcp restart-vms --project-id my-project --zone us-central1-a --name ceph-1

```

### Options

```
-h, --help help for restart-vms
--name string Name of a specific VM to restart (e.g. jumpbox, postgres, ceph-1, k0s-1). Restarts all VMs if not specified.
--project-id string GCP Project ID (optional, will use infra file if not provided)
--zone string GCP Zone (optional, will use infra file if not provided)
```

### SEE ALSO

* [oms beta bootstrap-gcp](oms_beta_bootstrap-gcp.md) - Bootstrap GCP infrastructure for Codesphere

77 changes: 77 additions & 0 deletions internal/bootstrap/gcp/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,83 @@ func (b *GCPBootstrapper) waitForInstanceRunning(projectID, zone, name string, n
name, pollInterval*time.Duration(maxAttempts))
}

// findVMDef looks up a VM definition by name. Returns the VMDef and true if found.
func findVMDef(name string) (VMDef, bool) {
for _, vm := range vmDefs {
if vm.Name == name {
return vm, true
}
}
return VMDef{}, false
}

// validVMNames returns the list of known VM names from vmDefs.
func validVMNames() []string {
names := make([]string, len(vmDefs))
for i, vm := range vmDefs {
names[i] = vm.Name
}
return names
}

// RestartVM restarts a single stopped or terminated VM by a name that is defined in vmDefs.
func (b *GCPBootstrapper) RestartVM(name string) error {
vm, found := findVMDef(name)
if !found {
return fmt.Errorf("unknown VM name %q; valid names are: %s", name, strings.Join(validVMNames(), ", "))
}

projectID := b.Env.ProjectID
zone := b.Env.Zone

inst, err := b.GCPClient.GetInstance(projectID, zone, name)
if err != nil {
if IsNotFoundError(err) {
return fmt.Errorf("instance %s does not exist in project %s / zone %s; did you run bootstrap first?", name, projectID, zone)
}
return fmt.Errorf("failed to get instance %s: %w", name, err)
}

switch s := inst.GetStatus(); s {
case "RUNNING":
b.stlog.Logf("Instance %s is already running", name)
return nil
case "TERMINATED", "STOPPED":
b.stlog.Logf("Starting stopped instance %s...", name)
if err := b.GCPClient.StartInstance(projectID, zone, name); err != nil {
return fmt.Errorf("failed to start instance %s: %w", name, err)
}
case "SUSPENDED":
return fmt.Errorf("instance %s is SUSPENDED; manual resume is required", name)
default:
return fmt.Errorf("instance %s is in unexpected state %q", name, s)
}

readyInstance, err := b.waitForInstanceRunning(projectID, zone, name, vm.ExternalIP)
if err != nil {
return fmt.Errorf("instance %s did not become ready: %w", name, err)
}

internalIP, externalIP := ExtractInstanceIPs(readyInstance)
b.stlog.Logf("Instance %s is now running (internal=%s, external=%s)", name, internalIP, externalIP)
return nil
}

// RestartVMs restarts all stopped or terminated VMs defined in vmDefs.
// VMs are restarted sequentially because StepLogger is not thread-safe.
func (b *GCPBootstrapper) RestartVMs() error {
var errs []error
for _, vm := range vmDefs {
if err := b.RestartVM(vm.Name); err != nil {
errs = append(errs, err)
}
}
if len(errs) > 0 {
return fmt.Errorf("errors restarting VMs: %w", errors.Join(errs...))
}
return nil
}

// ReadSSHKey reads an SSH key file, expanding ~ in the path
func (b *GCPBootstrapper) ReadSSHKey(path string) (string, error) {
realPath := util.ExpandPath(path)
Expand Down
Loading
Loading