diff --git a/td2/alert.go b/td2/alert.go index b3a412b..09bdbb9 100644 --- a/td2/alert.go +++ b/td2/alert.go @@ -246,7 +246,7 @@ func notifySlack(msg *alertMsg) (err error) { } client := &http.Client{} - resp, err := client.Do(req) + resp, err := client.Do(req) //#nosec G704 -- URL is from operator-supplied config if err != nil { return } @@ -312,7 +312,7 @@ func notifyDiscord(msg *alertMsg) (err error) { } req.Header.Set("Content-Type", "application/json") - resp, err := client.Do(req) + resp, err := client.Do(req) //#nosec G704 -- URL is from operator-supplied config if err != nil { l(slog.LevelWarn, "⚠️ Could not notify discord!", err) return err @@ -320,7 +320,7 @@ func notifyDiscord(msg *alertMsg) (err error) { _ = resp.Body.Close() if resp.StatusCode != 204 { - slog.Warn("discord webhook returned non-success response", "status", resp.StatusCode) + slog.Warn("discord webhook returned non-success response", "status", resp.StatusCode) //#nosec G706 -- resp.StatusCode is an integer, not user-controlled string data l(slog.LevelWarn, "⚠️ Could not notify discord! Returned", resp.StatusCode) return err } @@ -497,7 +497,7 @@ func notifyWebhook(msg *alertMsg) (err error) { req.Header.Set("Content-Type", "application/json") client := &http.Client{Timeout: 30 * time.Second} - resp, err := client.Do(req) + resp, err := client.Do(req) //#nosec G704 -- URL is from operator-supplied config if err != nil { l(slog.LevelWarn, "⚠️ Could not send webhook!", err) return err @@ -656,7 +656,7 @@ func evaluateNoRPCEndpointsAlert(cc *ChainConfig, noNodesSec *int) (bool, bool) alert, resolved := false, false alertID := fmt.Sprintf("NoRPCEndpoints_%s", cc.ValAddress) - if cc.noNodes { + if cc.noNodes || cc.noWsNodes { *noNodesSec += 2 if *noNodesSec <= 60*td.NodeDownMin { if *noNodesSec%20 == 0 { @@ -1141,7 +1141,7 @@ func (cc *ChainConfig) watch() { for { if cc.valInfo == nil || cc.valInfo.Moniker == "not connected" { time.Sleep(time.Second) - if boolVal(cc.Alerts.AlertIfNoServers) && cc.noNodes && noNodesSec >= 60*td.NodeDownMin { + if boolVal(cc.Alerts.AlertIfNoServers) && (cc.noNodes || cc.noWsNodes) && noNodesSec >= 60*td.NodeDownMin { alertID := fmt.Sprintf("NoRPCEndpoints_%s", cc.ValAddress) if !alarms.exist(cc.name, alertID) { td.alert( diff --git a/td2/chain-details.go b/td2/chain-details.go index bfd8196..491b545 100644 --- a/td2/chain-details.go +++ b/td2/chain-details.go @@ -426,7 +426,7 @@ func (cc *ChainConfig) getBankMetadataFromCosmosDirectory(denom string) *bank.Me // Denom exponents are typically small (0-18), but we check the full range for safety exponent := uint32(0) if unit.Exponent >= 0 && unit.Exponent <= 255 { - exponent = uint32(unit.Exponent) + exponent = uint32(unit.Exponent) //#nosec G115 -- bounds-checked above (0-255 fits in uint32) } denomUnits[i] = &bank.DenomUnit{ Denom: unit.Denom, diff --git a/td2/encryption.go b/td2/encryption.go index e7cbc49..22e4235 100644 --- a/td2/encryption.go +++ b/td2/encryption.go @@ -227,6 +227,6 @@ func EncryptedConfig(plaintext, ciphertext, pass string, decrypting bool) error if decrypting { fileType = "decrypted" } - slog.Info("wrote file", "bytes", size, "type", fileType, "file", outfile) + slog.Info("wrote file", "bytes", size, "type", fileType, "file", outfile) //#nosec G706 -- outfile is a CLI-provided path, not network-controlled input return nil } diff --git a/td2/init.go b/td2/init.go index cd1405c..736b4e7 100644 --- a/td2/init.go +++ b/td2/init.go @@ -53,7 +53,7 @@ func init() { if len(parts) == 0 { continue } - msgStr := strings.TrimRight(strings.TrimLeft(fmt.Sprint(parts...), "["), "]") + msgStr := strings.TrimSpace(fmt.Sprintln(parts...)) slog.Log(context.Background(), level, "tenderduty | "+msgStr) if td.EnableDash && !td.HideLogs && td.logChan != nil { td.logChan <- dash.LogMessage{ diff --git a/td2/provider-default.go b/td2/provider-default.go index f5aa05a..73cf3e1 100644 --- a/td2/provider-default.go +++ b/td2/provider-default.go @@ -84,7 +84,7 @@ func (d *DefaultProvider) CheckIfValidatorVoted(ctx context.Context, proposalID continue // Try next node } - resp, err := client.Do(req) + resp, err := client.Do(req) //#nosec G704 -- URL is from operator-supplied config if err != nil { lastErr = err continue // Try next node diff --git a/td2/provider-namada.go b/td2/provider-namada.go index c102df8..16f7cf8 100644 --- a/td2/provider-namada.go +++ b/td2/provider-namada.go @@ -55,7 +55,7 @@ func getVotingPeriodProposals(httpClient *http.Client, indexers []string) ([]gov continue // Try next node } - resp, err := httpClient.Do(req) + resp, err := httpClient.Do(req) //#nosec G704 -- URL is from operator-supplied config if err != nil { lastErr = err continue // Try next node @@ -133,7 +133,7 @@ func (d *NamadaProvider) QueryUnvotedOpenProposals(ctx context.Context) ([]gov.P continue // Try next node } - resp, err := httpClient.Do(req) + resp, err := httpClient.Do(req) //#nosec G704 -- URL is from operator-supplied config if err != nil { continue // Try next node } @@ -338,7 +338,7 @@ func (d *NamadaProvider) QueryValidatorSelfDelegationRewardsAndCommission(ctx co continue // Try next node } - resp, err := httpClient.Do(req) + resp, err := httpClient.Do(req) //#nosec G704 -- URL is from operator-supplied config if err != nil { lastErr = err continue // Try next node @@ -397,7 +397,7 @@ func (d *NamadaProvider) QueryValidatorVotingPool(ctx context.Context) (votingPo continue // Try next node } - resp, err := httpClient.Do(req) + resp, err := httpClient.Do(req) //#nosec G704 -- URL is from operator-supplied config if err != nil { lastErr = err continue // Try next node diff --git a/td2/rpc.go b/td2/rpc.go index df89131..0348886 100644 --- a/td2/rpc.go +++ b/td2/rpc.go @@ -340,7 +340,7 @@ func getStatusWithEndpoint(ctx context.Context, u string) (string, bool, error) TLSClientConfig: &tls.Config{InsecureSkipVerify: td.TLSSkipVerify}, } client := &http.Client{Transport: tr} - resp, err := client.Do(req) + resp, err := client.Do(req) //#nosec G704 -- URL is from operator-supplied config if err != nil { return "", false, err } diff --git a/td2/types.go b/td2/types.go index 5ba2f9a..d4188b1 100644 --- a/td2/types.go +++ b/td2/types.go @@ -176,6 +176,7 @@ type ChainConfig struct { wsclient *TmConn // custom websocket client to work around wss:// bugs in tendermint client *rpchttp.HTTP // legit tendermint client noNodes bool // tracks if all nodes are down + noWsNodes bool // tracks if all websocket endpoints are down valInfo *ValInfo // recent validator state, only refreshed every few minutes lastValInfo *ValInfo // use for detecting newly-jailed/tombstone totalBondedTokens float64 // total bonded tokens on the chain @@ -331,7 +332,7 @@ type NodeConfig struct { // PDConfig is the information required to send alerts to PagerDuty type PDConfig struct { Enabled *bool `yaml:"enabled"` - ApiKey string `yaml:"api_key"` + ApiKey string `yaml:"api_key" json:"-"` DefaultSeverity string `yaml:"default_severity"` SeverityThreshold string `yaml:"severity_threshold"` } @@ -347,7 +348,7 @@ type DiscordConfig struct { // TeleConfig holds the information needed to publish to a Telegram webhook for sending alerts type TeleConfig struct { Enabled *bool `yaml:"enabled"` - ApiKey string `yaml:"api_key"` + ApiKey string `yaml:"api_key" json:"-"` Channel string `yaml:"channel"` Mentions []string `yaml:"mentions"` SeverityThreshold string `yaml:"severity_threshold"` diff --git a/td2/utils/price-conversion.go b/td2/utils/price-conversion.go index 6003d46..e7c22ba 100644 --- a/td2/utils/price-conversion.go +++ b/td2/utils/price-conversion.go @@ -149,7 +149,7 @@ func (c *CoinMarketCapClient) fetchPricesFromAPI(ctx context.Context, slugs []st q.Add("convert", currency) req.URL.RawQuery = q.Encode() - resp, err := c.httpClient.Do(req) + resp, err := c.httpClient.Do(req) //#nosec G704 -- URL is from operator-supplied config if err != nil { // Log the error and continue with next slug fmt.Printf("Error fetching data for slug %s: %v\n", slug, err) diff --git a/td2/validator.go b/td2/validator.go index 480c10c..dc8faf1 100644 --- a/td2/validator.go +++ b/td2/validator.go @@ -179,7 +179,7 @@ func (cc *ChainConfig) GetValInfo(first bool) (err error) { } } if first { - l("⚙️ ", cc.ValAddress[:20], "... is using consensus key: ", cc.valInfo.Valcons) + l("⚙️", cc.ValAddress[:20], "... is using consensus key: ", cc.valInfo.Valcons) } } diff --git a/td2/ws.go b/td2/ws.go index dbf46b1..0da99bf 100644 --- a/td2/ws.go +++ b/td2/ws.go @@ -92,13 +92,50 @@ func (cc *ChainConfig) WsRun() { break } - //#nosec G402 -- configurable option - cc.wsclient, err = NewClient(cc.client.Remote(), td.TLSSkipVerify) - if err != nil { - l(slog.LevelError, err) - cancel() + // Collect candidate websocket URLs, prioritising healthy nodes, then falling back to down nodes. + var wsUrls []string + seen := make(map[string]bool) + for _, node := range cc.Nodes { + if !node.down && !seen[node.Url] { + wsUrls = append(wsUrls, node.Url) + seen[node.Url] = true + } + } + // Include the active RPC client's URL in case it is a cosmos.directory fallback not listed in Nodes. + if cc.client != nil { + if remote := cc.client.Remote(); !seen[remote] { + wsUrls = append(wsUrls, remote) + seen[remote] = true + } + } + // Last resort: try nodes that are currently marked as down. + for _, node := range cc.Nodes { + if node.down && !seen[node.Url] { + wsUrls = append(wsUrls, node.Url) + seen[node.Url] = true + } + } + + // Try each candidate URL until a websocket connection succeeds. + var wsURL string + for _, u := range wsUrls { + //#nosec G402 -- configurable option + client, connErr := NewClient(u, td.TLSSkipVerify) + if connErr != nil { + l(slog.LevelWarn, cc.ChainId, fmt.Sprintf("websocket connection failed for %s: %s", u, connErr)) + continue + } + cc.wsclient = client + wsURL = u + break + } + + if cc.wsclient == nil { + l(slog.LevelError, cc.ChainId, "all websocket endpoints failed") + cc.noWsNodes = true return } + cc.noWsNodes = false defer cc.wsclient.Close() err = cc.wsclient.SetCompressionLevel(3) if err != nil { @@ -290,7 +327,7 @@ func (cc *ChainConfig) WsRun() { break } } - l(fmt.Sprintf("⚙️ %-12s watching for NewBlock and Vote events via %s", cc.ChainId, cc.client.Remote())) + l(fmt.Sprintf("⚙️ %-12s watching for NewBlock and Vote events via %s", cc.ChainId, wsURL)) for { select { case <-cc.client.Quit():