diff --git a/README.md b/README.md index ec5adbf7ac..6f5ce1c30d 100644 --- a/README.md +++ b/README.md @@ -202,6 +202,7 @@ logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/So meminfo\_numa | Exposes memory statistics from `/sys/devices/system/node/node[0-9]*/meminfo`, `/sys/devices/system/node/node[0-9]*/numastat`. | Linux mountstats | Exposes filesystem statistics from `/proc/self/mountstats`. Exposes detailed NFS client statistics. | Linux network_route | Exposes the routing table as metrics | Linux +nvmesubsystem | Exposes NVMe-oF subsystem path health from `/sys/class/nvme-subsystem/`. | Linux pcidevice | Exposes pci devices' information including their link status and parent devices. | Linux perf | Exposes perf based metrics (Warning: Metrics are dependent on kernel configuration and settings). | Linux processes | Exposes aggregate process statistics from `/proc`. | Linux @@ -339,6 +340,25 @@ echo 'role{role="application_server"} 1' > /path/to/directory/role.prom.$$ mv /path/to/directory/role.prom.$$ /path/to/directory/role.prom ``` +### NVMe Subsystem Collector + +The `nvmesubsystem` collector exposes NVMe-oF (NVMe over Fabrics) subsystem +path health by reading `/sys/class/nvme-subsystem/`. It complements the +existing `nvme` collector (which reports per-controller hardware stats) by +monitoring the **connectivity layer** — how many controller paths are live, +connecting, or dead for each NVMe subsystem. + +Enable it with `--collector.nvmesubsystem`. + +#### Exposed metrics + +| Metric | Description | +|--------|-------------| +| `node_nvmesubsystem_info` | Info metric with subsystem NQN, model, serial and I/O policy as labels. | +| `node_nvmesubsystem_paths_total` | Total number of controller paths for the subsystem. | +| `node_nvmesubsystem_paths_live` | Number of controller paths currently in `live` state. | +| `node_nvmesubsystem_path_state` | Per-controller path state (1 for the current state, 0 for others). | + ### Filtering enabled collectors The `node_exporter` will expose all metrics from enabled collectors by default. This is the recommended way to collect metrics to avoid errors when comparing metrics of different families. diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index f8a453fe37..930494005f 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -2255,6 +2255,104 @@ Lines: 1 4096 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/nvme-subsystem +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/nvme-subsystem/nvme-subsys0 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/iopolicy +Lines: 1 +round-robinEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/model +Lines: 1 +Dell PowerStoreEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/nvme-subsystem/nvme-subsys0/nvme0 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme0/address +Lines: 1 +nn-0x200000109b123456:pn-0x100000109b123456EOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme0/state +Lines: 1 +liveEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme0/transport +Lines: 1 +fcEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/nvme-subsystem/nvme-subsys0/nvme1 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme1/address +Lines: 1 +nn-0x200000109b123457:pn-0x100000109b123457EOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme1/state +Lines: 1 +liveEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme1/transport +Lines: 1 +fcEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/nvme-subsystem/nvme-subsys0/nvme2 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme2/address +Lines: 1 +nn-0x200000109b123458:pn-0x100000109b123458EOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme2/state +Lines: 1 +liveEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme2/transport +Lines: 1 +fcEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/nvme-subsystem/nvme-subsys0/nvme3 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme3/address +Lines: 1 +nn-0x200000109b123459:pn-0x100000109b123459EOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme3/state +Lines: 1 +deadEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/nvme3/transport +Lines: 1 +fcEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/serial +Lines: 1 +SN12345678EOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme-subsystem/nvme-subsys0/subsysnqn +Lines: 1 +nqn.2014-08.org.nvmexpress:uuid:a34c4f3a-0d6f-5cec-dead-beefcafebabeEOF +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class/power_supply Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/collector/nvmesubsystem_linux.go b/collector/nvmesubsystem_linux.go new file mode 100644 index 0000000000..77d5f5fafe --- /dev/null +++ b/collector/nvmesubsystem_linux.go @@ -0,0 +1,209 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !nonvmesubsystem + +package collector + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/prometheus/client_golang/prometheus" +) + +type nvmeSubsystemCollector struct { + logger *slog.Logger + scanSubsystems func() ([]nvmeSubsystem, error) + + subsystemInfo *prometheus.Desc + subsystemPathsTotal *prometheus.Desc + subsystemPathsLive *prometheus.Desc + pathState *prometheus.Desc +} + +type nvmeSubsystem struct { + Name string + NQN string + Model string + Serial string + IOPolicy string + Controllers []nvmeController +} + +type nvmeController struct { + Name string + State string + Transport string + Address string +} + +var ( + nvmeControllerRE = regexp.MustCompile(`^nvme\d+$`) + + nvmeControllerStates = []string{ + "live", "connecting", "resetting", "dead", "unknown", + } +) + +func normalizeControllerState(raw string) string { + switch raw { + case "live", "connecting", "resetting", "dead": + return raw + case "deleting", "deleting (no IO)", "new": + return raw + default: + return "unknown" + } +} + +func init() { + registerCollector("nvmesubsystem", defaultDisabled, NewNVMeSubsystemCollector) +} + +// NewNVMeSubsystemCollector returns a new Collector exposing NVMe-oF subsystem +// path health from /sys/class/nvme-subsystem/. +func NewNVMeSubsystemCollector(logger *slog.Logger) (Collector, error) { + const subsystem = "nvmesubsystem" + + c := &nvmeSubsystemCollector{ + logger: logger, + subsystemInfo: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "info"), + "Non-numeric information about an NVMe subsystem.", + []string{"subsystem", "nqn", "model", "serial", "iopolicy"}, nil, + ), + subsystemPathsTotal: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "paths_total"), + "Total number of controller paths for an NVMe subsystem.", + []string{"subsystem"}, nil, + ), + subsystemPathsLive: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "paths_live"), + "Number of controller paths in live state for an NVMe subsystem.", + []string{"subsystem"}, nil, + ), + pathState: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "path_state"), + "Current NVMe controller path state (1 for the current state, 0 for all others).", + []string{"subsystem", "controller", "transport", "state"}, nil, + ), + } + + c.scanSubsystems = func() ([]nvmeSubsystem, error) { + return scanNVMeSubsystems(*sysPath) + } + + return c, nil +} + +func (c *nvmeSubsystemCollector) Update(ch chan<- prometheus.Metric) error { + subsystems, err := c.scanSubsystems() + if err != nil { + return fmt.Errorf("failed to scan NVMe subsystems: %w", err) + } + + for _, subsys := range subsystems { + ch <- prometheus.MustNewConstMetric(c.subsystemInfo, prometheus.GaugeValue, 1, + subsys.Name, subsys.NQN, subsys.Model, subsys.Serial, subsys.IOPolicy) + + total := float64(len(subsys.Controllers)) + var live float64 + for _, ctrl := range subsys.Controllers { + state := normalizeControllerState(ctrl.State) + if state == "live" { + live++ + } + + for _, s := range nvmeControllerStates { + val := 0.0 + if s == state { + val = 1.0 + } + ch <- prometheus.MustNewConstMetric(c.pathState, prometheus.GaugeValue, val, + subsys.Name, ctrl.Name, ctrl.Transport, s) + } + } + + ch <- prometheus.MustNewConstMetric(c.subsystemPathsTotal, prometheus.GaugeValue, total, subsys.Name) + ch <- prometheus.MustNewConstMetric(c.subsystemPathsLive, prometheus.GaugeValue, live, subsys.Name) + } + + return nil +} + +func scanNVMeSubsystems(sysfsBase string) ([]nvmeSubsystem, error) { + subsysBase := filepath.Join(sysfsBase, "class", "nvme-subsystem") + + entries, err := os.ReadDir(subsysBase) + if err != nil { + return nil, err + } + + var subsystems []nvmeSubsystem + for _, entry := range entries { + if !strings.HasPrefix(entry.Name(), "nvme-subsys") { + continue + } + subsysPath := filepath.Join(subsysBase, entry.Name()) + subsys, err := parseNVMeSubsystem(entry.Name(), subsysPath) + if err != nil { + continue + } + subsystems = append(subsystems, *subsys) + } + + return subsystems, nil +} + +func parseNVMeSubsystem(name, path string) (*nvmeSubsystem, error) { + subsys := &nvmeSubsystem{Name: name} + + subsys.NQN = readSysfsString(filepath.Join(path, "subsysnqn")) + subsys.Model = readSysfsString(filepath.Join(path, "model")) + subsys.Serial = readSysfsString(filepath.Join(path, "serial")) + subsys.IOPolicy = readSysfsString(filepath.Join(path, "iopolicy")) + + entries, err := os.ReadDir(path) + if err != nil { + return subsys, nil + } + + for _, entry := range entries { + if !nvmeControllerRE.MatchString(entry.Name()) { + continue + } + ctrlPath := filepath.Join(path, entry.Name()) + ctrl := nvmeController{ + Name: entry.Name(), + State: readSysfsString(filepath.Join(ctrlPath, "state")), + Transport: readSysfsString(filepath.Join(ctrlPath, "transport")), + Address: readSysfsString(filepath.Join(ctrlPath, "address")), + } + subsys.Controllers = append(subsys.Controllers, ctrl) + } + + return subsys, nil +} + +func readSysfsString(path string) string { + data, err := os.ReadFile(path) + if err != nil { + return "" + } + return strings.TrimSpace(string(data)) +} diff --git a/collector/nvmesubsystem_linux_test.go b/collector/nvmesubsystem_linux_test.go new file mode 100644 index 0000000000..c296de3687 --- /dev/null +++ b/collector/nvmesubsystem_linux_test.go @@ -0,0 +1,188 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !nonvmesubsystem + +package collector + +import ( + "fmt" + "io" + "log/slog" + "path/filepath" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" +) + +func TestNVMeSubsystemScan(t *testing.T) { + subsystems, err := scanNVMeSubsystems(filepath.Join("fixtures", "sys")) + if err != nil { + t.Fatal(err) + } + + if len(subsystems) != 1 { + t.Fatalf("expected 1 subsystem, got %d", len(subsystems)) + } + + s := subsystems[0] + if s.Name != "nvme-subsys0" { + t.Errorf("expected nvme-subsys0, got %s", s.Name) + } + if s.NQN != "nqn.2014-08.org.nvmexpress:uuid:a34c4f3a-0d6f-5cec-dead-beefcafebabe" { + t.Errorf("unexpected nqn: %s", s.NQN) + } + if s.Model != "Dell PowerStore" { + t.Errorf("expected Dell PowerStore, got %s", s.Model) + } + if s.IOPolicy != "round-robin" { + t.Errorf("expected round-robin, got %s", s.IOPolicy) + } + if len(s.Controllers) != 4 { + t.Fatalf("expected 4 controllers, got %d", len(s.Controllers)) + } + + liveCount := 0 + for _, c := range s.Controllers { + if c.State == "live" { + liveCount++ + } + if c.Transport != "fc" { + t.Errorf("expected transport fc, got %s for %s", c.Transport, c.Name) + } + } + if liveCount != 3 { + t.Errorf("expected 3 live controllers, got %d", liveCount) + } +} + +func TestNVMeSubsystemMetrics(t *testing.T) { + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + coll, err := NewNVMeSubsystemCollector(logger) + if err != nil { + t.Fatal(err) + } + + c := coll.(*nvmeSubsystemCollector) + c.scanSubsystems = func() ([]nvmeSubsystem, error) { + return scanNVMeSubsystems(filepath.Join("fixtures", "sys")) + } + + ch := make(chan prometheus.Metric, 200) + if err := c.Update(ch); err != nil { + t.Fatal(err) + } + close(ch) + + metrics := make(map[string][]*dto.Metric) + for m := range ch { + d := &dto.Metric{} + if err := m.Write(d); err != nil { + t.Fatal(err) + } + desc := m.Desc().String() + metrics[desc] = append(metrics[desc], d) + } + + assertGaugeValue(t, metrics, "paths_total", labelMap{"subsystem": "nvme-subsys0"}, 4) + assertGaugeValue(t, metrics, "paths_live", labelMap{"subsystem": "nvme-subsys0"}, 3) + + assertGaugeValue(t, metrics, "path_state", + labelMap{"subsystem": "nvme-subsys0", "controller": "nvme0", "transport": "fc", "state": "live"}, 1) + assertGaugeValue(t, metrics, "path_state", + labelMap{"subsystem": "nvme-subsys0", "controller": "nvme0", "transport": "fc", "state": "dead"}, 0) + assertGaugeValue(t, metrics, "path_state", + labelMap{"subsystem": "nvme-subsys0", "controller": "nvme3", "transport": "fc", "state": "dead"}, 1) + assertGaugeValue(t, metrics, "path_state", + labelMap{"subsystem": "nvme-subsys0", "controller": "nvme3", "transport": "fc", "state": "live"}, 0) +} + +func TestNVMeSubsystemNotPresent(t *testing.T) { + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + coll, err := NewNVMeSubsystemCollector(logger) + if err != nil { + t.Fatal(err) + } + + c := coll.(*nvmeSubsystemCollector) + c.scanSubsystems = func() ([]nvmeSubsystem, error) { + return nil, fmt.Errorf("no nvme-subsystem directory") + } + + ch := make(chan prometheus.Metric, 200) + err = c.Update(ch) + close(ch) + + if err == nil { + t.Fatal("expected error when NVMe subsystem sysfs is not available") + } +} + +func TestNormalizeControllerState(t *testing.T) { + tests := []struct { + raw string + expected string + }{ + {"live", "live"}, + {"connecting", "connecting"}, + {"resetting", "resetting"}, + {"dead", "dead"}, + {"deleting", "deleting"}, + {"deleting (no IO)", "deleting (no IO)"}, + {"new", "new"}, + {"", "unknown"}, + {"something-else", "unknown"}, + } + for _, tc := range tests { + got := normalizeControllerState(tc.raw) + if got != tc.expected { + t.Errorf("normalizeControllerState(%q) = %q, want %q", tc.raw, got, tc.expected) + } + } +} + +type labelMap map[string]string + +func assertGaugeValue(t *testing.T, metrics map[string][]*dto.Metric, metricSubstring string, labels labelMap, expected float64) { + t.Helper() + for desc, ms := range metrics { + if !strings.Contains(desc, metricSubstring) { + continue + } + for _, m := range ms { + if matchLabels(m.GetLabel(), labels) { + got := m.GetGauge().GetValue() + if got != expected { + t.Errorf("%s%v: got %v, want %v", metricSubstring, labels, got, expected) + } + return + } + } + } + t.Errorf("metric %s%v not found", metricSubstring, labels) +} + +func matchLabels(pairs []*dto.LabelPair, want labelMap) bool { + if want == nil { + return len(pairs) == 0 + } + found := 0 + for _, lp := range pairs { + if v, ok := want[lp.GetName()]; ok && v == lp.GetValue() { + found++ + } + } + return found == len(want) +}