From 1a60aaf67e93e5537f7d1d1473fdb08d3e73f39e Mon Sep 17 00:00:00 2001 From: John Morales Date: Thu, 5 Mar 2026 22:43:16 -0500 Subject: [PATCH 01/10] WIP Signed-off-by: John Morales --- collector/diskstats_linux.go | 22 +++++++ collector/ext4_linux.go | 110 +++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 collector/ext4_linux.go diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go index daca55d968..22d522a7b0 100644 --- a/collector/diskstats_linux.go +++ b/collector/diskstats_linux.go @@ -247,6 +247,20 @@ func NewDiskstatsCollector(logger *slog.Logger) (Collector, error) { ), valueType: prometheus.GaugeValue, }, }, + ioErrDesc: typedFactorDesc{ + desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ioerr_total"), + "Number of IO commands that completed with an error.", + []string{"device"}, + nil, + ), valueType: prometheus.CounterValue, + }, + ioDoneDesc: typedFactorDesc{ + desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "iodone_total"), + "Number of completed or rejected IO commands.", + []string{"device"}, + nil, + ), valueType: prometheus.CounterValue, + }, logger: logger, } @@ -368,6 +382,14 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error { } } } + + ioDeviceStats, err := c.fs.SysBlockDeviceIOStat(dev) + if err != nil && !os.IsNotExist(err) { + c.logger.Debug("Failed to get block device io stats", "device", dev, "err", err) + } + ch <- c.ioErrDesc.mustNewConstMetric(float64(ioDeviceStats.IOErrCount), dev) + ch <- c.ioDoneDesc.mustNewConstMetric(float64(ioDeviceStats.IODoneCount), dev) + } return nil } diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go new file mode 100644 index 0000000000..6ae591b2b5 --- /dev/null +++ b/collector/ext4_linux.go @@ -0,0 +1,110 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noext4 +// +build !noext4 + +package collector + +import ( + "fmt" + "log/slog" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs/ext4" +) + +// An ext4Collector is a Collector which gathers metrics from ext4 filesystems. +type ext4Collector struct { + fs ext4.FS + logger *slog.Logger +} + +func init() { + registerCollector("ext4", defaultEnabled, NewExt4Collector) +} + +// NewExt4Collector returns a new Collector exposing ext4 statistics. +func NewExt4Collector(logger *slog.Logger) (Collector, error) { + fs, err := ext4.NewFS(*procPath, *sysPath) + if err != nil { + return nil, fmt.Errorf("failed to open sysfs: %w", err) + } + + return &ext4Collector{ + fs: fs, + logger: logger, + }, nil +} + +// Update implements Collector. +func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { + stats, err := c.fs.ProcStat() + if err != nil { + return fmt.Errorf("failed to retrieve ext4 stats: %w", err) + } + + for _, s := range stats { + c.updateExt4Stats(ch, s) + } + + return nil +} + +// updateExt4Stats collects statistics for a single ext4 filesystem. +func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) { + const ( + subsystem = "ext4" + ) + var ( + labels = []string{"device"} + ) + + metrics := []struct { + name string + desc string + value float64 + }{ + { + name: "errors", + desc: "Number of ext4 filesystem errors.", + value: float64(s.Errors), + }, + { + name: "warnings", + desc: "Number of ext4 filesystem warnings.", + value: float64(s.Warnings), + }, + { + name: "messages", + desc: "Number of ext4 filesystem log messages.", + value: float64(s.Messages), + }, + } + + for _, m := range metrics { + desc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, m.name), + m.desc, + labels, + nil, + ) + + ch <- prometheus.MustNewConstMetric( + desc, + prometheus.CounterValue, + m.value, + s.Name, + ) + } +} From 27c4f84066fe4da6af2fb16e210b03f7c96389ae Mon Sep 17 00:00:00 2001 From: John Morales Date: Fri, 6 Mar 2026 17:25:49 -0500 Subject: [PATCH 02/10] Update test cases and sync up definition for typedDesc Signed-off-by: John Morales --- collector/diskstats_linux.go | 6 ++++-- collector/diskstats_linux_test.go | 8 ++++++++ collector/fixtures/e2e-64k-page-output.txt | 9 +++++++++ collector/fixtures/e2e-output.txt | 9 +++++++++ end-to-end-test.sh | 1 + 5 files changed, 31 insertions(+), 2 deletions(-) diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go index 22d522a7b0..45a3b1794b 100644 --- a/collector/diskstats_linux.go +++ b/collector/diskstats_linux.go @@ -75,6 +75,8 @@ type diskstatsCollector struct { filesystemInfoDesc typedDesc deviceMapperInfoDesc typedDesc ataDescs map[string]typedDesc + ioErrDesc typedDesc + ioDoneDesc typedDesc logger *slog.Logger getUdevDeviceProperties func(uint32, uint32) (udevInfo, error) } @@ -247,14 +249,14 @@ func NewDiskstatsCollector(logger *slog.Logger) (Collector, error) { ), valueType: prometheus.GaugeValue, }, }, - ioErrDesc: typedFactorDesc{ + ioErrDesc: typedDesc{ desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ioerr_total"), "Number of IO commands that completed with an error.", []string{"device"}, nil, ), valueType: prometheus.CounterValue, }, - ioDoneDesc: typedFactorDesc{ + ioDoneDesc: typedDesc{ desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "iodone_total"), "Number of completed or rejected IO commands.", []string{"device"}, diff --git a/collector/diskstats_linux_test.go b/collector/diskstats_linux_test.go index 08a5024c8a..e7175fb7ab 100644 --- a/collector/diskstats_linux_test.go +++ b/collector/diskstats_linux_test.go @@ -178,6 +178,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 307 +node_disk_iodone_total{device="sr0"} 2767 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 3 +node_disk_ioerr_total{device="sr0"} 29 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index b860becad2..7b4c28abfa 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -554,6 +554,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 307 +node_disk_iodone_total{device="sr0"} 2767 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 3 +node_disk_ioerr_total{device="sr0"} 29 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -3062,6 +3070,7 @@ node_scrape_collector_success{collector="dmi"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 node_scrape_collector_success{collector="entropy"} 1 +node_scrape_collector_success{collector="ext4"} 1 node_scrape_collector_success{collector="fibrechannel"} 1 node_scrape_collector_success{collector="filefd"} 1 node_scrape_collector_success{collector="hwmon"} 1 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 7ac06c0f87..f905a75516 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -586,6 +586,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 307 +node_disk_iodone_total{device="sr0"} 2767 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 3 +node_disk_ioerr_total{device="sr0"} 29 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -3094,6 +3102,7 @@ node_scrape_collector_success{collector="dmi"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 node_scrape_collector_success{collector="entropy"} 1 +node_scrape_collector_success{collector="ext4"} 1 node_scrape_collector_success{collector="fibrechannel"} 1 node_scrape_collector_success{collector="filefd"} 1 node_scrape_collector_success{collector="hwmon"} 1 diff --git a/end-to-end-test.sh b/end-to-end-test.sh index de490bfff8..b9ae331436 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -50,6 +50,7 @@ enabled_collectors=$(cat << COLLECTORS drbd edac entropy + ext4 fibrechannel filefd hwmon From 41cf16b77c5ac84d33c8ae64f4f114bedcc26495 Mon Sep 17 00:00:00 2001 From: John Morales Date: Fri, 6 Mar 2026 19:24:45 -0500 Subject: [PATCH 03/10] ext4 test fixes Signed-off-by: John Morales --- collector/diskstats_linux.go | 11 ++--- collector/ext4_linux.go | 37 +++++++++-------- collector/ext4_linux_test.go | 69 +++++++++++++++++++++++++++++++ collector/fixtures/sys.ttar | 80 +++++++++++++++++++++++++++++------- 4 files changed, 161 insertions(+), 36 deletions(-) create mode 100644 collector/ext4_linux_test.go diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go index 45a3b1794b..c9a31238e6 100644 --- a/collector/diskstats_linux.go +++ b/collector/diskstats_linux.go @@ -385,12 +385,13 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error { } } - ioDeviceStats, err := c.fs.SysBlockDeviceIOStat(dev) - if err != nil && !os.IsNotExist(err) { - c.logger.Debug("Failed to get block device io stats", "device", dev, "err", err) + ioDeviceStats, ioErr := c.fs.SysBlockDeviceIOStat(dev) + if ioErr == nil { + ch <- c.ioErrDesc.mustNewConstMetric(float64(ioDeviceStats.IOErrCount), dev) + ch <- c.ioDoneDesc.mustNewConstMetric(float64(ioDeviceStats.IODoneCount), dev) + } else if !os.IsNotExist(ioErr) { + c.logger.Info("Failed to get block device io stats", "device", dev, "err", ioErr) } - ch <- c.ioErrDesc.mustNewConstMetric(float64(ioDeviceStats.IOErrCount), dev) - ch <- c.ioDoneDesc.mustNewConstMetric(float64(ioDeviceStats.IODoneCount), dev) } return nil diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go index 6ae591b2b5..a7b1563005 100644 --- a/collector/ext4_linux.go +++ b/collector/ext4_linux.go @@ -61,37 +61,42 @@ func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { return nil } -// updateExt4Stats collects statistics for a single ext4 filesystem. -func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) { - const ( - subsystem = "ext4" - ) - var ( - labels = []string{"device"} - ) +type ext4Metric struct { + name string + desc string + value float64 +} - metrics := []struct { - name string - desc string - value float64 - }{ +func (c *ext4Collector) getMetrics(s *ext4.Stats) []ext4Metric { + return []ext4Metric{ { - name: "errors", + name: "errors_total", desc: "Number of ext4 filesystem errors.", value: float64(s.Errors), }, { - name: "warnings", + name: "warnings_total", desc: "Number of ext4 filesystem warnings.", value: float64(s.Warnings), }, { - name: "messages", + name: "messages_total", desc: "Number of ext4 filesystem log messages.", value: float64(s.Messages), }, } +} + +// updateExt4Stats collects statistics for a single ext4 filesystem. +func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) { + const ( + subsystem = "ext4" + ) + var ( + labels = []string{"device"} + ) + metrics := c.getMetrics(s) for _, m := range metrics { desc := prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, m.name), diff --git a/collector/ext4_linux_test.go b/collector/ext4_linux_test.go new file mode 100644 index 0000000000..8ec32232e1 --- /dev/null +++ b/collector/ext4_linux_test.go @@ -0,0 +1,69 @@ +// Copyright 2019 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noext4 + +package collector + +import ( + "testing" + + "github.com/prometheus/procfs" + "github.com/prometheus/procfs/ext4" +) + +var expectedExt4Metrics = [][]ext4Metric{ + { + {name: "errors_total", value: 12}, + {name: "warnings_total", value: 34}, + {name: "messages_total", value: 567}, + }, +} + +func checkExt4Metric(exp, got *ext4Metric) bool { + if exp.name != got.name || + exp.value != got.value { + return false + } + return true +} + +func TestExt4(t *testing.T) { + fs, err := ext4.NewFS(procfs.DefaultMountPoint, "fixtures/sys") + if err != nil { + t.Fatal(err) + } + collector := &ext4Collector{fs: fs} + + stats, err := collector.fs.ProcStat() + if err != nil { + t.Fatalf("Failed to retrieve ext4 stats: %v", err) + } + if len(stats) != len(expectedExt4Metrics) { + t.Fatalf("Unexpected number of ext4 stats: expected %v, got %v", len(expectedExt4Metrics), len(stats)) + } + + for i, s := range stats { + metrics := collector.getMetrics(s) + if len(metrics) != len(expectedExt4Metrics[i]) { + t.Fatalf("Unexpected number of ext4 metrics: expected %v, got %v", len(expectedExt4Metrics[i]), len(metrics)) + } + + for j, m := range metrics { + exp := expectedExt4Metrics[i][j] + if !checkExt4Metric(&exp, &m) { + t.Errorf("Incorrect ext4 metric: expected %#v, got: %#v", exp, m) + } + } + } +} diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index f8a453fe37..4704c508cb 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -595,6 +595,19 @@ Mode: 644 Directory: sys/block/sda Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sda/device +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sda/device/iodone_cnt +Lines: 1 +307 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sda/device/ioerr_cnt +Lines: 1 +3 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/block/sda/queue Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -826,6 +839,22 @@ Lines: 1 none Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sr0 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sr0/device +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sr0/device/iodone_cnt +Lines: 1 +2767 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sr0/device/ioerr_cnt +Lines: 1 +29 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/bus Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2219,16 +2248,6 @@ Lines: 1 Samsung SSD 970 PRO 512GB Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Path: sys/class/nvme/nvme0/serial -Lines: 1 -S680HF8N190894I -Mode: 444 -# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Path: sys/class/nvme/nvme0/state -Lines: 1 -live -Mode: 444 -# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class/nvme/nvme0/nvme0c0n0 Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2237,11 +2256,6 @@ Lines: 1 optimized Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Path: sys/class/nvme/nvme0/nvme0c0n0/size -Lines: 1 -3906250000 -Mode: 444 -# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/class/nvme/nvme0/nvme0c0n0/nuse Lines: 1 488281250 @@ -2255,6 +2269,21 @@ Lines: 1 4096 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/nvme0c0n0/size +Lines: 1 +3906250000 +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/serial +Lines: 1 +S680HF8N190894I +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/state +Lines: 1 +live +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class/power_supply Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -10247,6 +10276,27 @@ Lines: 1 4096 Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/fs/ext4 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/fs/ext4/sdb1 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/fs/ext4/sdb1/errors_count +Lines: 1 +12 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/fs/ext4/sdb1/msg_count +Lines: 1 +567 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/fs/ext4/sdb1/warning_count +Lines: 1 +34 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/fs/xfs Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From e77b8e8048efb0852f67df60e8166efd5456323c Mon Sep 17 00:00:00 2001 From: John Morales Date: Fri, 6 Mar 2026 19:33:27 -0500 Subject: [PATCH 04/10] Both ext4 and diskstats tests passing with hex encoded fixtures for diskstats Signed-off-by: John Morales --- collector/diskstats_linux_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/collector/diskstats_linux_test.go b/collector/diskstats_linux_test.go index e7175fb7ab..cbcbc8bf61 100644 --- a/collector/diskstats_linux_test.go +++ b/collector/diskstats_linux_test.go @@ -180,12 +180,12 @@ node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 # HELP node_disk_iodone_total Number of completed or rejected IO commands. # TYPE node_disk_iodone_total counter -node_disk_iodone_total{device="sda"} 307 -node_disk_iodone_total{device="sr0"} 2767 +node_disk_iodone_total{device="sda"} 775 +node_disk_iodone_total{device="sr0"} 1.29433517e+08 # HELP node_disk_ioerr_total Number of IO commands that completed with an error. # TYPE node_disk_ioerr_total counter -node_disk_ioerr_total{device="sda"} 3 -node_disk_ioerr_total{device="sr0"} 29 +node_disk_ioerr_total{device="sda"} 11 +node_disk_ioerr_total{device="sr0"} 41 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 From 5471764dca5f90a557571807dc13f0d82834f8b3 Mon Sep 17 00:00:00 2001 From: John Morales Date: Fri, 6 Mar 2026 19:44:34 -0500 Subject: [PATCH 05/10] update sys.ttar for hex fixture values Signed-off-by: John Morales --- collector/fixtures/sys.ttar | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index 4704c508cb..f0d8fd2465 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -600,12 +600,12 @@ Mode: 775 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/block/sda/device/iodone_cnt Lines: 1 -307 +0x307 Mode: 664 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/block/sda/device/ioerr_cnt Lines: 1 -3 +0xb Mode: 664 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/block/sda/queue @@ -847,12 +847,12 @@ Mode: 775 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/block/sr0/device/iodone_cnt Lines: 1 -2767 +0x7b6ffad Mode: 664 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/block/sr0/device/ioerr_cnt Lines: 1 -29 +0x29 Mode: 664 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/bus From 270617a272de1e3d8eba25e2de6f31618621d350 Mon Sep 17 00:00:00 2001 From: John Morales Date: Sat, 7 Mar 2026 21:32:21 -0500 Subject: [PATCH 06/10] Add device filters flags for ext4 Signed-off-by: John Morales --- collector/diskstats_linux.go | 2 +- collector/ext4_linux.go | 48 +++++++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go index c9a31238e6..ea74b1ea28 100644 --- a/collector/diskstats_linux.go +++ b/collector/diskstats_linux.go @@ -390,7 +390,7 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error { ch <- c.ioErrDesc.mustNewConstMetric(float64(ioDeviceStats.IOErrCount), dev) ch <- c.ioDoneDesc.mustNewConstMetric(float64(ioDeviceStats.IODoneCount), dev) } else if !os.IsNotExist(ioErr) { - c.logger.Info("Failed to get block device io stats", "device", dev, "err", ioErr) + c.logger.Debug("Failed to get block device io stats", "device", dev, "err", ioErr) } } diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go index a7b1563005..a403d5b233 100644 --- a/collector/ext4_linux.go +++ b/collector/ext4_linux.go @@ -17,33 +17,70 @@ package collector import ( + "errors" "fmt" "log/slog" + "github.com/alecthomas/kingpin/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs/ext4" ) +var ( + ext4DeviceExclude = kingpin.Flag( + "collector.ext4.device-exclude", + "Regexp of ext4 devices to exclude (mutually exclusive to device-include).", + ).String() + + ext4DeviceInclude = kingpin.Flag( + "collector.ext4.device-include", + "Regexp of ext4 devices to include (mutually exclusive to device-exclude).", + ).String() +) + // An ext4Collector is a Collector which gathers metrics from ext4 filesystems. type ext4Collector struct { - fs ext4.FS - logger *slog.Logger + deviceFilter deviceFilter + fs ext4.FS + logger *slog.Logger } func init() { registerCollector("ext4", defaultEnabled, NewExt4Collector) } +func newExt4DeviceFilter(logger *slog.Logger) (deviceFilter, error) { + if *ext4DeviceExclude != "" && *ext4DeviceInclude != "" { + return deviceFilter{}, errors.New("device-exclude & device-include are mutually exclusive") + } + + if *ext4DeviceExclude != "" { + logger.Info("Parsed flag --collector.ext4.device-exclude", "flag", *ext4DeviceExclude) + } + + if *ext4DeviceInclude != "" { + logger.Info("Parsed Flag --collector.ext4.device-include", "flag", *ext4DeviceInclude) + } + + return newDeviceFilter(*ext4DeviceExclude, *ext4DeviceInclude), nil +} + // NewExt4Collector returns a new Collector exposing ext4 statistics. func NewExt4Collector(logger *slog.Logger) (Collector, error) { + ext4DeviceFilter, err := newExt4DeviceFilter(logger) + if err != nil { + return nil, fmt.Errorf("failed to parse device filter flags: %w", err) + } + fs, err := ext4.NewFS(*procPath, *sysPath) if err != nil { return nil, fmt.Errorf("failed to open sysfs: %w", err) } return &ext4Collector{ - fs: fs, - logger: logger, + deviceFilter: ext4DeviceFilter, + fs: fs, + logger: logger, }, nil } @@ -55,6 +92,9 @@ func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { } for _, s := range stats { + if c.deviceFilter.ignored(s.Name) { + continue + } c.updateExt4Stats(ch, s) } From e630ba43d97f6dc47088e97eb637c9e816c44fdb Mon Sep 17 00:00:00 2001 From: John Morales Date: Sat, 7 Mar 2026 22:51:12 -0500 Subject: [PATCH 07/10] Default-exclude the ext4 'features' meta device Signed-off-by: John Morales --- collector/ext4_linux.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go index a403d5b233..96f45a05e1 100644 --- a/collector/ext4_linux.go +++ b/collector/ext4_linux.go @@ -26,11 +26,15 @@ import ( "github.com/prometheus/procfs/ext4" ) +const ( + ext4DefaultIgnoredDevices = "^features$" +) + var ( ext4DeviceExclude = kingpin.Flag( "collector.ext4.device-exclude", "Regexp of ext4 devices to exclude (mutually exclusive to device-include).", - ).String() + ).Default(ext4DefaultIgnoredDevices).String() ext4DeviceInclude = kingpin.Flag( "collector.ext4.device-include", From 30aa9502a12435c6bce940f9d70dc6e9c19218d9 Mon Sep 17 00:00:00 2001 From: John Morales Date: Sun, 8 Mar 2026 18:44:22 -0400 Subject: [PATCH 08/10] e2e test fixture fixes Signed-off-by: John Morales --- collector/fixtures/e2e-output.txt | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index f905a75516..0578e58bbb 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -588,12 +588,12 @@ node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 # HELP node_disk_iodone_total Number of completed or rejected IO commands. # TYPE node_disk_iodone_total counter -node_disk_iodone_total{device="sda"} 307 -node_disk_iodone_total{device="sr0"} 2767 +node_disk_iodone_total{device="sda"} 775 +node_disk_iodone_total{device="sr0"} 1.29433517e+08 # HELP node_disk_ioerr_total Number of IO commands that completed with an error. # TYPE node_disk_ioerr_total counter -node_disk_ioerr_total{device="sda"} 3 -node_disk_ioerr_total{device="sr0"} 29 +node_disk_ioerr_total{device="sda"} 11 +node_disk_ioerr_total{device="sr0"} 41 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -802,6 +802,15 @@ node_entropy_available_bits 1337 node_entropy_pool_size_bits 4096 # HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which node_exporter was built, and the goos and goarch for the build. # TYPE node_exporter_build_info gauge +# HELP node_ext4_errors_total Number of ext4 filesystem errors. +# TYPE node_ext4_errors_total counter +node_ext4_errors_total{device="sdb1"} 12 +# HELP node_ext4_messages_total Number of ext4 filesystem log messages. +# TYPE node_ext4_messages_total counter +node_ext4_messages_total{device="sdb1"} 567 +# HELP node_ext4_warnings_total Number of ext4 filesystem warnings. +# TYPE node_ext4_warnings_total counter +node_ext4_warnings_total{device="sdb1"} 34 # HELP node_fibrechannel_dumped_frames_total Number of dumped frames # TYPE node_fibrechannel_dumped_frames_total counter node_fibrechannel_dumped_frames_total{fc_host="host1"} 0 From 42045f2d322624c1752ad8aca8f59a571096db68 Mon Sep 17 00:00:00 2001 From: John Morales Date: Sun, 8 Mar 2026 18:52:05 -0400 Subject: [PATCH 09/10] e2e-64k-page-output.txt fixture update Signed-off-by: John Morales --- collector/fixtures/e2e-64k-page-output.txt | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 7b4c28abfa..b7d959f0fe 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -556,12 +556,12 @@ node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 # HELP node_disk_iodone_total Number of completed or rejected IO commands. # TYPE node_disk_iodone_total counter -node_disk_iodone_total{device="sda"} 307 -node_disk_iodone_total{device="sr0"} 2767 +node_disk_iodone_total{device="sda"} 775 +node_disk_iodone_total{device="sr0"} 1.29433517e+08 # HELP node_disk_ioerr_total Number of IO commands that completed with an error. # TYPE node_disk_ioerr_total counter -node_disk_ioerr_total{device="sda"} 3 -node_disk_ioerr_total{device="sr0"} 29 +node_disk_ioerr_total{device="sda"} 11 +node_disk_ioerr_total{device="sr0"} 41 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -770,6 +770,15 @@ node_entropy_available_bits 1337 node_entropy_pool_size_bits 4096 # HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which node_exporter was built, and the goos and goarch for the build. # TYPE node_exporter_build_info gauge +# HELP node_ext4_errors_total Number of ext4 filesystem errors. +# TYPE node_ext4_errors_total counter +node_ext4_errors_total{device="sdb1"} 12 +# HELP node_ext4_messages_total Number of ext4 filesystem log messages. +# TYPE node_ext4_messages_total counter +node_ext4_messages_total{device="sdb1"} 567 +# HELP node_ext4_warnings_total Number of ext4 filesystem warnings. +# TYPE node_ext4_warnings_total counter +node_ext4_warnings_total{device="sdb1"} 34 # HELP node_fibrechannel_dumped_frames_total Number of dumped frames # TYPE node_fibrechannel_dumped_frames_total counter node_fibrechannel_dumped_frames_total{fc_host="host1"} 0 From d2e264a92c510fec9bd4150e844d26bc3cc8d337 Mon Sep 17 00:00:00 2001 From: John Morales Date: Mon, 9 Mar 2026 15:22:52 -0400 Subject: [PATCH 10/10] feedback: update ext4 collector for more consistent label naming partition instead of device Signed-off-by: John Morales --- collector/ext4_linux.go | 54 +++++++++++----------- collector/fixtures/e2e-64k-page-output.txt | 6 +-- collector/fixtures/e2e-output.txt | 6 +-- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go index 96f45a05e1..db418fb05a 100644 --- a/collector/ext4_linux.go +++ b/collector/ext4_linux.go @@ -27,53 +27,53 @@ import ( ) const ( - ext4DefaultIgnoredDevices = "^features$" + ext4DefaultIgnoredPartitions = "^features$" ) var ( - ext4DeviceExclude = kingpin.Flag( - "collector.ext4.device-exclude", - "Regexp of ext4 devices to exclude (mutually exclusive to device-include).", - ).Default(ext4DefaultIgnoredDevices).String() - - ext4DeviceInclude = kingpin.Flag( - "collector.ext4.device-include", - "Regexp of ext4 devices to include (mutually exclusive to device-exclude).", + ext4PartitionExclude = kingpin.Flag( + "collector.ext4.partition-exclude", + "Regexp of ext4 partitions to exclude (mutually exclusive to partition-include).", + ).Default(ext4DefaultIgnoredPartitions).String() + + ext4PartitionInclude = kingpin.Flag( + "collector.ext4.partition-include", + "Regexp of ext4 partitions to include (mutually exclusive to partition-exclude).", ).String() ) // An ext4Collector is a Collector which gathers metrics from ext4 filesystems. type ext4Collector struct { - deviceFilter deviceFilter - fs ext4.FS - logger *slog.Logger + partitionFilter deviceFilter + fs ext4.FS + logger *slog.Logger } func init() { registerCollector("ext4", defaultEnabled, NewExt4Collector) } -func newExt4DeviceFilter(logger *slog.Logger) (deviceFilter, error) { - if *ext4DeviceExclude != "" && *ext4DeviceInclude != "" { - return deviceFilter{}, errors.New("device-exclude & device-include are mutually exclusive") +func newExt4PartitionFilter(logger *slog.Logger) (deviceFilter, error) { + if *ext4PartitionExclude != "" && *ext4PartitionInclude != "" { + return deviceFilter{}, errors.New("partition-exclude & partition-include are mutually exclusive") } - if *ext4DeviceExclude != "" { - logger.Info("Parsed flag --collector.ext4.device-exclude", "flag", *ext4DeviceExclude) + if *ext4PartitionExclude != "" { + logger.Info("Parsed flag --collector.ext4.partition-exclude", "flag", *ext4PartitionExclude) } - if *ext4DeviceInclude != "" { - logger.Info("Parsed Flag --collector.ext4.device-include", "flag", *ext4DeviceInclude) + if *ext4PartitionInclude != "" { + logger.Info("Parsed Flag --collector.ext4.partition-include", "flag", *ext4PartitionInclude) } - return newDeviceFilter(*ext4DeviceExclude, *ext4DeviceInclude), nil + return newDeviceFilter(*ext4PartitionExclude, *ext4PartitionInclude), nil } // NewExt4Collector returns a new Collector exposing ext4 statistics. func NewExt4Collector(logger *slog.Logger) (Collector, error) { - ext4DeviceFilter, err := newExt4DeviceFilter(logger) + ext4PartitionFilter, err := newExt4PartitionFilter(logger) if err != nil { - return nil, fmt.Errorf("failed to parse device filter flags: %w", err) + return nil, fmt.Errorf("failed to parse partition filter flags: %w", err) } fs, err := ext4.NewFS(*procPath, *sysPath) @@ -82,9 +82,9 @@ func NewExt4Collector(logger *slog.Logger) (Collector, error) { } return &ext4Collector{ - deviceFilter: ext4DeviceFilter, - fs: fs, - logger: logger, + partitionFilter: ext4PartitionFilter, + fs: fs, + logger: logger, }, nil } @@ -96,7 +96,7 @@ func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { } for _, s := range stats { - if c.deviceFilter.ignored(s.Name) { + if c.partitionFilter.ignored(s.Name) { continue } c.updateExt4Stats(ch, s) @@ -137,7 +137,7 @@ func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Sta subsystem = "ext4" ) var ( - labels = []string{"device"} + labels = []string{"partition"} ) metrics := c.getMetrics(s) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index b7d959f0fe..eade5b9ce3 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -772,13 +772,13 @@ node_entropy_pool_size_bits 4096 # TYPE node_exporter_build_info gauge # HELP node_ext4_errors_total Number of ext4 filesystem errors. # TYPE node_ext4_errors_total counter -node_ext4_errors_total{device="sdb1"} 12 +node_ext4_errors_total{partition="sdb1"} 12 # HELP node_ext4_messages_total Number of ext4 filesystem log messages. # TYPE node_ext4_messages_total counter -node_ext4_messages_total{device="sdb1"} 567 +node_ext4_messages_total{partition="sdb1"} 567 # HELP node_ext4_warnings_total Number of ext4 filesystem warnings. # TYPE node_ext4_warnings_total counter -node_ext4_warnings_total{device="sdb1"} 34 +node_ext4_warnings_total{partition="sdb1"} 34 # HELP node_fibrechannel_dumped_frames_total Number of dumped frames # TYPE node_fibrechannel_dumped_frames_total counter node_fibrechannel_dumped_frames_total{fc_host="host1"} 0 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 0578e58bbb..188eefb925 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -804,13 +804,13 @@ node_entropy_pool_size_bits 4096 # TYPE node_exporter_build_info gauge # HELP node_ext4_errors_total Number of ext4 filesystem errors. # TYPE node_ext4_errors_total counter -node_ext4_errors_total{device="sdb1"} 12 +node_ext4_errors_total{partition="sdb1"} 12 # HELP node_ext4_messages_total Number of ext4 filesystem log messages. # TYPE node_ext4_messages_total counter -node_ext4_messages_total{device="sdb1"} 567 +node_ext4_messages_total{partition="sdb1"} 567 # HELP node_ext4_warnings_total Number of ext4 filesystem warnings. # TYPE node_ext4_warnings_total counter -node_ext4_warnings_total{device="sdb1"} 34 +node_ext4_warnings_total{partition="sdb1"} 34 # HELP node_fibrechannel_dumped_frames_total Number of dumped frames # TYPE node_fibrechannel_dumped_frames_total counter node_fibrechannel_dumped_frames_total{fc_host="host1"} 0