Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -1163,15 +1163,15 @@ License URL: https://github.com/gomodules/jsonpatch/blob/v2.5.0/v2/LICENSE

----------
Module: google.golang.org/api
Version: v0.273.0
Version: v0.273.1
License: BSD-3-Clause
License URL: https://github.com/googleapis/google-api-go-client/blob/v0.273.0/LICENSE
License URL: https://github.com/googleapis/google-api-go-client/blob/v0.273.1/LICENSE

----------
Module: google.golang.org/api/internal/third_party/uritemplates
Version: v0.273.0
Version: v0.273.1
License: BSD-3-Clause
License URL: https://github.com/googleapis/google-api-go-client/blob/v0.273.0/internal/third_party/uritemplates/LICENSE
License URL: https://github.com/googleapis/google-api-go-client/blob/v0.273.1/internal/third_party/uritemplates/LICENSE

----------
Module: google.golang.org/genproto/googleapis
Expand All @@ -1193,9 +1193,9 @@ License URL: https://github.com/googleapis/go-genproto/blob/d00831a3d3e7/googlea

----------
Module: google.golang.org/grpc
Version: v1.79.3
Version: v1.80.0
License: Apache-2.0
License URL: https://github.com/grpc/grpc-go/blob/v1.79.3/LICENSE
License URL: https://github.com/grpc/grpc-go/blob/v1.80.0/LICENSE

----------
Module: google.golang.org/protobuf
Expand Down
1 change: 1 addition & 0 deletions cli/cmd/bootstrap_gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ func AddBootstrapGcpCmd(parent *cobra.Command, opts *GlobalOptions) {
parent.AddCommand(bootstrapGcpCmd.cmd)
AddBootstrapGcpPostconfigCmd(bootstrapGcpCmd.cmd, opts)
AddBootstrapGcpCleanupCmd(bootstrapGcpCmd.cmd, opts)
AddBootstrapGcpRestartVMsCmd(bootstrapGcpCmd.cmd, opts)
}

func (c *BootstrapGcpCmd) BootstrapGcp() error {
Expand Down
134 changes: 134 additions & 0 deletions cli/cmd/bootstrap_gcp_restart_vms.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright (c) Codesphere Inc.
// SPDX-License-Identifier: Apache-2.0

package cmd

import (
"fmt"
"log"
"os"

csio "github.com/codesphere-cloud/cs-go/pkg/io"
"github.com/codesphere-cloud/oms/internal/bootstrap"
"github.com/codesphere-cloud/oms/internal/bootstrap/gcp"
"github.com/codesphere-cloud/oms/internal/util"
"github.com/spf13/cobra"
)

type BootstrapGcpRestartVMsCmd struct {
cmd *cobra.Command
Opts *BootstrapGcpRestartVMsOpts
}

type BootstrapGcpRestartVMsOpts struct {
*GlobalOptions
ProjectID string
Zone string
Name string
}

func (c *BootstrapGcpRestartVMsCmd) RunE(_ *cobra.Command, args []string) error {
ctx := c.cmd.Context()
stlog := bootstrap.NewStepLogger(false)
fw := util.NewFilesystemWriter()

projectID := c.Opts.ProjectID
zone := c.Opts.Zone

if projectID == "" || zone == "" {
infraFilePath := gcp.GetInfraFilePath()
infraEnv, exists, err := gcp.LoadInfraFile(fw, infraFilePath)
if err != nil {
return fmt.Errorf("failed to load infra file: %w", err)
}
if !exists {
return fmt.Errorf("infra file not found at %s; use --project-id and --zone flags", infraFilePath)
}
if projectID == "" {
projectID = infraEnv.ProjectID
}
if zone == "" {
zone = infraEnv.Zone
}
}
Comment thread
OliverTrautvetter marked this conversation as resolved.
Outdated

if projectID == "" {
return fmt.Errorf("project ID is required; set --project-id or ensure the infra file exists")
}
if zone == "" {
return fmt.Errorf("zone is required; set --zone or ensure the infra file exists")
}
Comment thread
OliverTrautvetter marked this conversation as resolved.
Outdated

gcpClient := gcp.NewGCPClient(ctx, stlog, os.Getenv("GOOGLE_APPLICATION_CREDENTIALS"))

csEnv := &gcp.CodesphereEnvironment{
ProjectID: projectID,
Zone: zone,
}

bs, err := gcp.NewGCPBootstrapper(
ctx,
nil,
stlog,
csEnv,
nil,
gcpClient,
fw,
nil,
nil,
util.NewTime(),
nil,
)
if err != nil {
return fmt.Errorf("failed to create bootstrapper: %w", err)
}

if c.Opts.Name != "" {
log.Printf("Restarting VM %s in project %s (zone %s)...", c.Opts.Name, projectID, zone)
if err := bs.RestartVM(c.Opts.Name); err != nil {
return fmt.Errorf("failed to restart VM: %w", err)
}
log.Printf("VM %s restarted successfully.", c.Opts.Name)
} else {
log.Printf("Restarting all VMs in project %s (zone %s)...", projectID, zone)
if err := bs.RestartVMs(); err != nil {
return fmt.Errorf("failed to restart VMs: %w", err)
}
log.Printf("All VMs restarted successfully.")
}

return nil
}

func AddBootstrapGcpRestartVMsCmd(bootstrapGcp *cobra.Command, opts *GlobalOptions) {
restartVMs := BootstrapGcpRestartVMsCmd{
cmd: &cobra.Command{
Use: "restart-vms",
Short: "Restart stopped or terminated GCP VMs",
Long: csio.Long(`Restarts GCP compute instances that were stopped or terminated,
for example after spot VM preemption.
By default, restarts all VMs defined in the infrastructure.
Use --name to restart a single VM.
Project ID and zone are read from the local infra file if available,
Comment thread
OliverTrautvetter marked this conversation as resolved.
Outdated
or can be specified via flags.`),
Example: formatExamples("beta bootstrap-gcp restart-vms", []csio.Example{
{Desc: "Restart all VMs using project info from the local infra file"},
{Cmd: "--name jumpbox", Desc: "Restart only the jumpbox VM"},
{Cmd: "--name k0s-1", Desc: "Restart a specific k0s node"},
{Cmd: "--project-id my-project --zone us-central1-a", Desc: "Restart all VMs with explicit project and zone"},
{Cmd: "--project-id my-project --zone us-central1-a --name ceph-1", Desc: "Restart a specific VM with explicit project and zone"},
}),
},
Opts: &BootstrapGcpRestartVMsOpts{
GlobalOptions: opts,
},
}

flags := restartVMs.cmd.Flags()
flags.StringVar(&restartVMs.Opts.ProjectID, "project-id", "", "GCP Project ID (optional, will use infra file if not provided)")
flags.StringVar(&restartVMs.Opts.Zone, "zone", "", "GCP Zone (optional, will use infra file if not provided)")
flags.StringVar(&restartVMs.Opts.Name, "name", "", "Name of a specific VM to restart (e.g. jumpbox, postgres, ceph-1, k0s-1). Restarts all VMs if not specified.")

restartVMs.cmd.RunE = restartVMs.RunE
bootstrapGcp.AddCommand(restartVMs.cmd)
}
1 change: 1 addition & 0 deletions docs/oms_beta_bootstrap-gcp.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,5 @@ oms beta bootstrap-gcp [flags]
* [oms beta](oms_beta.md) - Commands for early testing
* [oms beta bootstrap-gcp cleanup](oms_beta_bootstrap-gcp_cleanup.md) - Clean up GCP infrastructure created by bootstrap-gcp
* [oms beta bootstrap-gcp postconfig](oms_beta_bootstrap-gcp_postconfig.md) - Run post-configuration steps for GCP bootstrapping
* [oms beta bootstrap-gcp restart-vms](oms_beta_bootstrap-gcp_restart-vms.md) - Restart stopped or terminated GCP VMs

50 changes: 50 additions & 0 deletions docs/oms_beta_bootstrap-gcp_restart-vms.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
## oms beta bootstrap-gcp restart-vms

Restart stopped or terminated GCP VMs

### Synopsis

Restarts GCP compute instances that were stopped or terminated,
for example after spot VM preemption.
By default, restarts all VMs defined in the infrastructure.
Use --name to restart a single VM.
Project ID and zone are read from the local infra file if available,
or can be specified via flags.

```
oms beta bootstrap-gcp restart-vms [flags]
```

### Examples

```
# Restart all VMs using project info from the local infra file
$ oms beta bootstrap-gcp restart-vms

# Restart only the jumpbox VM
$ oms beta bootstrap-gcp restart-vms --name jumpbox

# Restart a specific k0s node
$ oms beta bootstrap-gcp restart-vms --name k0s-1

# Restart all VMs with explicit project and zone
$ oms beta bootstrap-gcp restart-vms --project-id my-project --zone us-central1-a

# Restart a specific VM with explicit project and zone
$ oms beta bootstrap-gcp restart-vms --project-id my-project --zone us-central1-a --name ceph-1

```

### Options

```
-h, --help help for restart-vms
--name string Name of a specific VM to restart (e.g. jumpbox, postgres, ceph-1, k0s-1). Restarts all VMs if not specified.
--project-id string GCP Project ID (optional, will use infra file if not provided)
--zone string GCP Zone (optional, will use infra file if not provided)
```

### SEE ALSO

* [oms beta bootstrap-gcp](oms_beta_bootstrap-gcp.md) - Bootstrap GCP infrastructure for Codesphere

86 changes: 86 additions & 0 deletions internal/bootstrap/gcp/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,92 @@ func (b *GCPBootstrapper) waitForInstanceRunning(projectID, zone, name string, n
name, pollInterval*time.Duration(maxAttempts))
}

// findVMDef looks up a VM definition by name. Returns the VMDef and true if found.
func findVMDef(name string) (VMDef, bool) {
for _, vm := range vmDefs {
if vm.Name == name {
return vm, true
}
}
return VMDef{}, false
}

// validVMNames returns the list of known VM names from vmDefs.
func validVMNames() []string {
names := make([]string, len(vmDefs))
for i, vm := range vmDefs {
names[i] = vm.Name
}
return names
}

// RestartVM restarts a single stopped or terminated VM by a name that is defined in vmDefs.
func (b *GCPBootstrapper) RestartVM(name string) error {
vm, found := findVMDef(name)
if !found {
return fmt.Errorf("unknown VM name %q; valid names are: %s", name, strings.Join(validVMNames(), ", "))
}
Comment thread
OliverTrautvetter marked this conversation as resolved.
Outdated

projectID := b.Env.ProjectID
zone := b.Env.Zone

inst, err := b.GCPClient.GetInstance(projectID, zone, name)
if err != nil {
return fmt.Errorf("failed to get instance %s: %w", name, err)
}
Comment thread
OliverTrautvetter marked this conversation as resolved.

switch s := inst.GetStatus(); s {
case "RUNNING":
b.stlog.Logf("Instance %s is already running", name)
return nil
case "TERMINATED", "STOPPED":
b.stlog.Logf("Starting stopped instance %s...", name)
if err := b.GCPClient.StartInstance(projectID, zone, name); err != nil {
return fmt.Errorf("failed to start instance %s: %w", name, err)
}
case "SUSPENDED":
return fmt.Errorf("instance %s is SUSPENDED; manual resume is required", name)
default:
return fmt.Errorf("instance %s is in unexpected state %q", name, s)
}

readyInstance, err := b.waitForInstanceRunning(projectID, zone, name, vm.ExternalIP)
if err != nil {
return fmt.Errorf("instance %s did not become ready: %w", name, err)
}

internalIP, externalIP := ExtractInstanceIPs(readyInstance)
b.stlog.Logf("Instance %s is now running (internal=%s, external=%s)", name, internalIP, externalIP)
return nil
}

// RestartVMs restarts all stopped or terminated VMs defined in vmDefs.
func (b *GCPBootstrapper) RestartVMs() error {
wg := sync.WaitGroup{}
errCh := make(chan error, len(vmDefs))

for _, vm := range vmDefs {
wg.Add(1)
go func(vm VMDef) {
defer wg.Done()
if err := b.RestartVM(vm.Name); err != nil {
errCh <- err
}
}(vm)
}
wg.Wait()
close(errCh)

var errs []error
for err := range errCh {
errs = append(errs, err)
}
Comment thread
OliverTrautvetter marked this conversation as resolved.
Outdated
if len(errs) > 0 {
return fmt.Errorf("errors restarting VMs: %w", errors.Join(errs...))
}
return nil
}

// ReadSSHKey reads an SSH key file, expanding ~ in the path
func (b *GCPBootstrapper) ReadSSHKey(path string) (string, error) {
realPath := util.ExpandPath(path)
Expand Down
Loading
Loading