diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go index daca55d968..ea74b1ea28 100644 --- a/collector/diskstats_linux.go +++ b/collector/diskstats_linux.go @@ -75,6 +75,8 @@ type diskstatsCollector struct { filesystemInfoDesc typedDesc deviceMapperInfoDesc typedDesc ataDescs map[string]typedDesc + ioErrDesc typedDesc + ioDoneDesc typedDesc logger *slog.Logger getUdevDeviceProperties func(uint32, uint32) (udevInfo, error) } @@ -247,6 +249,20 @@ func NewDiskstatsCollector(logger *slog.Logger) (Collector, error) { ), valueType: prometheus.GaugeValue, }, }, + ioErrDesc: typedDesc{ + desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ioerr_total"), + "Number of IO commands that completed with an error.", + []string{"device"}, + nil, + ), valueType: prometheus.CounterValue, + }, + ioDoneDesc: typedDesc{ + desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "iodone_total"), + "Number of completed or rejected IO commands.", + []string{"device"}, + nil, + ), valueType: prometheus.CounterValue, + }, logger: logger, } @@ -368,6 +384,15 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error { } } } + + ioDeviceStats, ioErr := c.fs.SysBlockDeviceIOStat(dev) + if ioErr == nil { + ch <- c.ioErrDesc.mustNewConstMetric(float64(ioDeviceStats.IOErrCount), dev) + ch <- c.ioDoneDesc.mustNewConstMetric(float64(ioDeviceStats.IODoneCount), dev) + } else if !os.IsNotExist(ioErr) { + c.logger.Debug("Failed to get block device io stats", "device", dev, "err", ioErr) + } + } return nil } diff --git a/collector/diskstats_linux_test.go b/collector/diskstats_linux_test.go index 08a5024c8a..cbcbc8bf61 100644 --- a/collector/diskstats_linux_test.go +++ b/collector/diskstats_linux_test.go @@ -178,6 +178,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 775 +node_disk_iodone_total{device="sr0"} 1.29433517e+08 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 11 +node_disk_ioerr_total{device="sr0"} 41 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go new file mode 100644 index 0000000000..db418fb05a --- /dev/null +++ b/collector/ext4_linux.go @@ -0,0 +1,159 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noext4 +// +build !noext4 + +package collector + +import ( + "errors" + "fmt" + "log/slog" + + "github.com/alecthomas/kingpin/v2" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs/ext4" +) + +const ( + ext4DefaultIgnoredPartitions = "^features$" +) + +var ( + ext4PartitionExclude = kingpin.Flag( + "collector.ext4.partition-exclude", + "Regexp of ext4 partitions to exclude (mutually exclusive to partition-include).", + ).Default(ext4DefaultIgnoredPartitions).String() + + ext4PartitionInclude = kingpin.Flag( + "collector.ext4.partition-include", + "Regexp of ext4 partitions to include (mutually exclusive to partition-exclude).", + ).String() +) + +// An ext4Collector is a Collector which gathers metrics from ext4 filesystems. +type ext4Collector struct { + partitionFilter deviceFilter + fs ext4.FS + logger *slog.Logger +} + +func init() { + registerCollector("ext4", defaultEnabled, NewExt4Collector) +} + +func newExt4PartitionFilter(logger *slog.Logger) (deviceFilter, error) { + if *ext4PartitionExclude != "" && *ext4PartitionInclude != "" { + return deviceFilter{}, errors.New("partition-exclude & partition-include are mutually exclusive") + } + + if *ext4PartitionExclude != "" { + logger.Info("Parsed flag --collector.ext4.partition-exclude", "flag", *ext4PartitionExclude) + } + + if *ext4PartitionInclude != "" { + logger.Info("Parsed Flag --collector.ext4.partition-include", "flag", *ext4PartitionInclude) + } + + return newDeviceFilter(*ext4PartitionExclude, *ext4PartitionInclude), nil +} + +// NewExt4Collector returns a new Collector exposing ext4 statistics. +func NewExt4Collector(logger *slog.Logger) (Collector, error) { + ext4PartitionFilter, err := newExt4PartitionFilter(logger) + if err != nil { + return nil, fmt.Errorf("failed to parse partition filter flags: %w", err) + } + + fs, err := ext4.NewFS(*procPath, *sysPath) + if err != nil { + return nil, fmt.Errorf("failed to open sysfs: %w", err) + } + + return &ext4Collector{ + partitionFilter: ext4PartitionFilter, + fs: fs, + logger: logger, + }, nil +} + +// Update implements Collector. +func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { + stats, err := c.fs.ProcStat() + if err != nil { + return fmt.Errorf("failed to retrieve ext4 stats: %w", err) + } + + for _, s := range stats { + if c.partitionFilter.ignored(s.Name) { + continue + } + c.updateExt4Stats(ch, s) + } + + return nil +} + +type ext4Metric struct { + name string + desc string + value float64 +} + +func (c *ext4Collector) getMetrics(s *ext4.Stats) []ext4Metric { + return []ext4Metric{ + { + name: "errors_total", + desc: "Number of ext4 filesystem errors.", + value: float64(s.Errors), + }, + { + name: "warnings_total", + desc: "Number of ext4 filesystem warnings.", + value: float64(s.Warnings), + }, + { + name: "messages_total", + desc: "Number of ext4 filesystem log messages.", + value: float64(s.Messages), + }, + } +} + +// updateExt4Stats collects statistics for a single ext4 filesystem. +func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) { + const ( + subsystem = "ext4" + ) + var ( + labels = []string{"partition"} + ) + + metrics := c.getMetrics(s) + for _, m := range metrics { + desc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, m.name), + m.desc, + labels, + nil, + ) + + ch <- prometheus.MustNewConstMetric( + desc, + prometheus.CounterValue, + m.value, + s.Name, + ) + } +} diff --git a/collector/ext4_linux_test.go b/collector/ext4_linux_test.go new file mode 100644 index 0000000000..8ec32232e1 --- /dev/null +++ b/collector/ext4_linux_test.go @@ -0,0 +1,69 @@ +// Copyright 2019 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noext4 + +package collector + +import ( + "testing" + + "github.com/prometheus/procfs" + "github.com/prometheus/procfs/ext4" +) + +var expectedExt4Metrics = [][]ext4Metric{ + { + {name: "errors_total", value: 12}, + {name: "warnings_total", value: 34}, + {name: "messages_total", value: 567}, + }, +} + +func checkExt4Metric(exp, got *ext4Metric) bool { + if exp.name != got.name || + exp.value != got.value { + return false + } + return true +} + +func TestExt4(t *testing.T) { + fs, err := ext4.NewFS(procfs.DefaultMountPoint, "fixtures/sys") + if err != nil { + t.Fatal(err) + } + collector := &ext4Collector{fs: fs} + + stats, err := collector.fs.ProcStat() + if err != nil { + t.Fatalf("Failed to retrieve ext4 stats: %v", err) + } + if len(stats) != len(expectedExt4Metrics) { + t.Fatalf("Unexpected number of ext4 stats: expected %v, got %v", len(expectedExt4Metrics), len(stats)) + } + + for i, s := range stats { + metrics := collector.getMetrics(s) + if len(metrics) != len(expectedExt4Metrics[i]) { + t.Fatalf("Unexpected number of ext4 metrics: expected %v, got %v", len(expectedExt4Metrics[i]), len(metrics)) + } + + for j, m := range metrics { + exp := expectedExt4Metrics[i][j] + if !checkExt4Metric(&exp, &m) { + t.Errorf("Incorrect ext4 metric: expected %#v, got: %#v", exp, m) + } + } + } +} diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index b860becad2..eade5b9ce3 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -554,6 +554,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 775 +node_disk_iodone_total{device="sr0"} 1.29433517e+08 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 11 +node_disk_ioerr_total{device="sr0"} 41 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -762,6 +770,15 @@ node_entropy_available_bits 1337 node_entropy_pool_size_bits 4096 # HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which node_exporter was built, and the goos and goarch for the build. # TYPE node_exporter_build_info gauge +# HELP node_ext4_errors_total Number of ext4 filesystem errors. +# TYPE node_ext4_errors_total counter +node_ext4_errors_total{partition="sdb1"} 12 +# HELP node_ext4_messages_total Number of ext4 filesystem log messages. +# TYPE node_ext4_messages_total counter +node_ext4_messages_total{partition="sdb1"} 567 +# HELP node_ext4_warnings_total Number of ext4 filesystem warnings. +# TYPE node_ext4_warnings_total counter +node_ext4_warnings_total{partition="sdb1"} 34 # HELP node_fibrechannel_dumped_frames_total Number of dumped frames # TYPE node_fibrechannel_dumped_frames_total counter node_fibrechannel_dumped_frames_total{fc_host="host1"} 0 @@ -3062,6 +3079,7 @@ node_scrape_collector_success{collector="dmi"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 node_scrape_collector_success{collector="entropy"} 1 +node_scrape_collector_success{collector="ext4"} 1 node_scrape_collector_success{collector="fibrechannel"} 1 node_scrape_collector_success{collector="filefd"} 1 node_scrape_collector_success{collector="hwmon"} 1 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 7ac06c0f87..188eefb925 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -586,6 +586,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 775 +node_disk_iodone_total{device="sr0"} 1.29433517e+08 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 11 +node_disk_ioerr_total{device="sr0"} 41 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -794,6 +802,15 @@ node_entropy_available_bits 1337 node_entropy_pool_size_bits 4096 # HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which node_exporter was built, and the goos and goarch for the build. # TYPE node_exporter_build_info gauge +# HELP node_ext4_errors_total Number of ext4 filesystem errors. +# TYPE node_ext4_errors_total counter +node_ext4_errors_total{partition="sdb1"} 12 +# HELP node_ext4_messages_total Number of ext4 filesystem log messages. +# TYPE node_ext4_messages_total counter +node_ext4_messages_total{partition="sdb1"} 567 +# HELP node_ext4_warnings_total Number of ext4 filesystem warnings. +# TYPE node_ext4_warnings_total counter +node_ext4_warnings_total{partition="sdb1"} 34 # HELP node_fibrechannel_dumped_frames_total Number of dumped frames # TYPE node_fibrechannel_dumped_frames_total counter node_fibrechannel_dumped_frames_total{fc_host="host1"} 0 @@ -3094,6 +3111,7 @@ node_scrape_collector_success{collector="dmi"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 node_scrape_collector_success{collector="entropy"} 1 +node_scrape_collector_success{collector="ext4"} 1 node_scrape_collector_success{collector="fibrechannel"} 1 node_scrape_collector_success{collector="filefd"} 1 node_scrape_collector_success{collector="hwmon"} 1 diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index f8a453fe37..f0d8fd2465 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -595,6 +595,19 @@ Mode: 644 Directory: sys/block/sda Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sda/device +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sda/device/iodone_cnt +Lines: 1 +0x307 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sda/device/ioerr_cnt +Lines: 1 +0xb +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/block/sda/queue Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -826,6 +839,22 @@ Lines: 1 none Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sr0 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sr0/device +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sr0/device/iodone_cnt +Lines: 1 +0x7b6ffad +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sr0/device/ioerr_cnt +Lines: 1 +0x29 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/bus Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2219,16 +2248,6 @@ Lines: 1 Samsung SSD 970 PRO 512GB Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Path: sys/class/nvme/nvme0/serial -Lines: 1 -S680HF8N190894I -Mode: 444 -# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Path: sys/class/nvme/nvme0/state -Lines: 1 -live -Mode: 444 -# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class/nvme/nvme0/nvme0c0n0 Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2237,11 +2256,6 @@ Lines: 1 optimized Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Path: sys/class/nvme/nvme0/nvme0c0n0/size -Lines: 1 -3906250000 -Mode: 444 -# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/class/nvme/nvme0/nvme0c0n0/nuse Lines: 1 488281250 @@ -2255,6 +2269,21 @@ Lines: 1 4096 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/nvme0c0n0/size +Lines: 1 +3906250000 +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/serial +Lines: 1 +S680HF8N190894I +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/nvme/nvme0/state +Lines: 1 +live +Mode: 444 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class/power_supply Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -10247,6 +10276,27 @@ Lines: 1 4096 Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/fs/ext4 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/fs/ext4/sdb1 +Mode: 775 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/fs/ext4/sdb1/errors_count +Lines: 1 +12 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/fs/ext4/sdb1/msg_count +Lines: 1 +567 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/fs/ext4/sdb1/warning_count +Lines: 1 +34 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/fs/xfs Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/end-to-end-test.sh b/end-to-end-test.sh index de490bfff8..b9ae331436 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -50,6 +50,7 @@ enabled_collectors=$(cat << COLLECTORS drbd edac entropy + ext4 fibrechannel filefd hwmon