diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index db384df620..edfd4d8f2e 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -2177,6 +2177,11 @@ node_mountstats_nfs_event_vfs_write_pages_total{export="192.168.1.1:/srv/test",m # TYPE node_mountstats_nfs_event_write_extension_total counter node_mountstats_nfs_event_write_extension_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 node_mountstats_nfs_event_write_extension_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +# HELP node_mountstats_nfs_mountpoint_info Info metric for an NFS mountpoint. +# TYPE node_mountstats_nfs_mountpoint_info gauge +node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test",protocol="tcp"} 1 +node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="tcp"} 1 +node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="udp"} 1 # HELP node_mountstats_nfs_operations_major_timeouts_total Number of times a request has had a major timeout for a given operation. # TYPE node_mountstats_nfs_operations_major_timeouts_total counter node_mountstats_nfs_operations_major_timeouts_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",operation="ACCESS",protocol="udp"} 0 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 9d59cab31e..4773542091 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -2209,6 +2209,11 @@ node_mountstats_nfs_event_vfs_write_pages_total{export="192.168.1.1:/srv/test",m # TYPE node_mountstats_nfs_event_write_extension_total counter node_mountstats_nfs_event_write_extension_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 node_mountstats_nfs_event_write_extension_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +# HELP node_mountstats_nfs_mountpoint_info Info metric for an NFS mountpoint. +# TYPE node_mountstats_nfs_mountpoint_info gauge +node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test",protocol="tcp"} 1 +node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="tcp"} 1 +node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="udp"} 1 # HELP node_mountstats_nfs_operations_major_timeouts_total Number of times a request has had a major timeout for a given operation. # TYPE node_mountstats_nfs_operations_major_timeouts_total counter node_mountstats_nfs_operations_major_timeouts_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",operation="ACCESS",protocol="udp"} 0 diff --git a/collector/fixtures/proc/10/mountinfo b/collector/fixtures/proc/10/mountinfo index 5ab70c2b83..74a316f352 100644 --- a/collector/fixtures/proc/10/mountinfo +++ b/collector/fixtures/proc/10/mountinfo @@ -3,5 +3,5 @@ 17 21 0:4 / /proc rw,nosuid,nodev,noexec,relatime shared:12 - proc proc rw 21 0 8:1 / / rw,relatime shared:1 - ext4 /dev/sda1 rw,errors=remount-ro,data=ordered 194 21 0:42 / /mnt/nfs/test rw shared:144 - nfs4 192.168.1.1:/srv/test rw,vers=4.0,rsize=1048576,wsize=1048576,namlen=255,acregmin=3,acregmax=60,acdirmin=30,acdirmax=60,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=192.168.1.5,addr=192.168.1.1,local_lock=none -177 21 0:42 / /mnt/nfs/test rw shared:130 - nfs4 192.168.1.1:/srv/test rw,vers=4.0,rsize=1048576,wsize=1048576,namlen=255,acregmin=3,acregmax=60,acdirmin=30,acdirmax=60,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=192.168.1.5,addr=192.168.1.1,local_lock=none -1398 798 0:44 / /mnt/nfs/test rw,relatime shared:1154 - nfs 192.168.1.1:/srv/test rw,vers=3,rsize=32768,wsize=32768,namlen=255,hard,proto=udp,timeo=11,retrans=3,sec=sys,mountaddr=192.168.1.1,mountvers=3,mountport=49602,mountproto=udp,local_lock=none,addr=192.168.1.1 +177 21 0:42 / /mnt/nfs/test-dupe rw shared:130 - nfs4 192.168.1.1:/srv/test rw,vers=4.0,rsize=1048576,wsize=1048576,namlen=255,acregmin=3,acregmax=60,acdirmin=30,acdirmax=60,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=192.168.1.5,addr=192.168.1.1,local_lock=none +1398 798 0:44 / /mnt/nfs/test-dupe rw,relatime shared:1154 - nfs 192.168.1.1:/srv/test rw,vers=3,rsize=32768,wsize=32768,namlen=255,hard,proto=udp,timeo=11,retrans=3,sec=sys,mountaddr=192.168.1.1,mountvers=3,mountport=49602,mountproto=udp,local_lock=none,addr=192.168.1.1 diff --git a/collector/mountstats_linux.go b/collector/mountstats_linux.go index 1c3b9a99e6..c71e6e95e1 100644 --- a/collector/mountstats_linux.go +++ b/collector/mountstats_linux.go @@ -32,6 +32,7 @@ var ( type mountStatsCollector struct { // General statistics NFSAgeSecondsTotal *prometheus.Desc + NFSMountpointInfo *prometheus.Desc // Byte statistics NFSReadBytesTotal *prometheus.Desc @@ -105,6 +106,13 @@ type nfsDeviceIdentifier struct { MountAddress string } +type nfsMountpointIdentifier struct { + Device string + Protocol string + MountAddress string + MountPoint string +} + func init() { registerCollector("mountstats", defaultDisabled, NewMountStatsCollector) } @@ -127,9 +135,10 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) { ) var ( - labels = []string{"export", "protocol", "mountaddr"} - opLabels = []string{"export", "protocol", "mountaddr", "operation"} - translabels = []string{"export", "protocol", "mountaddr", "transport"} + labels = []string{"export", "protocol", "mountaddr"} + infoLabels = []string{"export", "protocol", "mountaddr", "mountpoint"} + opLabels = []string{"export", "protocol", "mountaddr", "operation"} + transportLabels = []string{"export", "protocol", "mountaddr", "transport"} ) return &mountStatsCollector{ @@ -140,6 +149,13 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) { nil, ), + NFSMountpointInfo: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "mountpoint_info"), + "Info metric for an NFS mountpoint.", + infoLabels, + nil, + ), + NFSReadBytesTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "read_bytes_total"), "Number of bytes read using the read() syscall.", @@ -199,70 +215,70 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) { NFSTransportBindTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_bind_total"), "Number of times the client has had to establish a connection from scratch to the NFS server.", - translabels, + transportLabels, nil, ), NFSTransportConnectTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_connect_total"), "Number of times the client has made a TCP connection to the NFS server.", - translabels, + transportLabels, nil, ), NFSTransportIdleTimeSeconds: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_idle_time_seconds"), "Duration since the NFS mount last saw any RPC traffic, in seconds.", - translabels, + transportLabels, nil, ), NFSTransportSendsTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_sends_total"), "Number of RPC requests for this mount sent to the NFS server.", - translabels, + transportLabels, nil, ), NFSTransportReceivesTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_receives_total"), "Number of RPC responses for this mount received from the NFS server.", - translabels, + transportLabels, nil, ), NFSTransportBadTransactionIDsTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_bad_transaction_ids_total"), "Number of times the NFS server sent a response with a transaction ID unknown to this client.", - translabels, + transportLabels, nil, ), NFSTransportBacklogQueueTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_backlog_queue_total"), "Total number of items added to the RPC backlog queue.", - translabels, + transportLabels, nil, ), NFSTransportMaximumRPCSlots: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_maximum_rpc_slots"), "Maximum number of simultaneously active RPC requests ever used.", - translabels, + transportLabels, nil, ), NFSTransportSendingQueueTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_sending_queue_total"), "Total number of items added to the RPC transmission sending queue.", - translabels, + transportLabels, nil, ), NFSTransportPendingQueueTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_pending_queue_total"), "Total number of items added to the RPC transmission pending queue.", - translabels, + transportLabels, nil, ), @@ -522,6 +538,7 @@ func (c *mountStatsCollector) Update(ch chan<- prometheus.Metric) error { // store all seen nfsDeviceIdentifiers for deduplication deviceList := make(map[nfsDeviceIdentifier]bool) + mountpointList := make(map[nfsMountpointIdentifier]bool) for idx, m := range mounts { // For the time being, only NFS statistics are available via this mechanism @@ -531,6 +548,7 @@ func (c *mountStatsCollector) Update(ch chan<- prometheus.Metric) error { continue } + mountPoint := m.Mount var mountAddress string if idx < len(mountsInfo) { // The mount entry order in the /proc/self/mountstats and /proc/self/mountinfo is the same. @@ -539,6 +557,14 @@ func (c *mountStatsCollector) Update(ch chan<- prometheus.Metric) error { } for k := range stats.Transport { + mountpointIdentifier := nfsMountpointIdentifier{m.Device, stats.Transport[k].Protocol, mountAddress, mountPoint} + if mountpointList[mountpointIdentifier] { + c.logger.Debug("Skipping duplicate mountpoint info entry", "device", mountpointIdentifier) + } else { + mountpointList[mountpointIdentifier] = true + c.updateNFSMountpointInfo(ch, m.Device, stats.Transport[k].Protocol, mountAddress, mountPoint) + } + deviceIdentifier := nfsDeviceIdentifier{m.Device, stats.Transport[k].Protocol, mountAddress} i := deviceList[deviceIdentifier] if i { @@ -553,6 +579,15 @@ func (c *mountStatsCollector) Update(ch chan<- prometheus.Metric) error { return nil } +func (c *mountStatsCollector) updateNFSMountpointInfo(ch chan<- prometheus.Metric, export, protocol, mountAddress, mountPoint string) { + ch <- prometheus.MustNewConstMetric( + c.NFSMountpointInfo, + prometheus.GaugeValue, + 1, + export, protocol, mountAddress, mountPoint, + ) +} + func (c *mountStatsCollector) updateNFSStats(ch chan<- prometheus.Metric, s *procfs.MountStatsNFS, export, protocol, mountAddress string) { labelValues := []string{export, protocol, mountAddress} ch <- prometheus.MustNewConstMetric( diff --git a/collector/mountstats_linux_test.go b/collector/mountstats_linux_test.go new file mode 100644 index 0000000000..8e10ab6893 --- /dev/null +++ b/collector/mountstats_linux_test.go @@ -0,0 +1,79 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !nomountstats + +package collector + +import ( + "io" + "log/slog" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" +) + +type testMountStatsCollector struct { + c Collector +} + +func (c testMountStatsCollector) Collect(ch chan<- prometheus.Metric) { + c.c.Update(ch) +} + +func (c testMountStatsCollector) Describe(ch chan<- *prometheus.Desc) { + prometheus.DescribeByCollect(c, ch) +} + +func NewTestMountStatsCollector(logger *slog.Logger) (prometheus.Collector, error) { + c, err := NewMountStatsCollector(logger) + if err != nil { + return testMountStatsCollector{}, err + } + return testMountStatsCollector{c: c}, nil +} + +func TestMountStatsMountPointInfo(t *testing.T) { + *procPath = "fixtures/proc" + + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + c, err := NewTestMountStatsCollector(logger) + if err != nil { + t.Fatal(err) + } + + reg := prometheus.NewPedanticRegistry() + reg.MustRegister(c) + + expected := `# HELP node_mountstats_nfs_age_seconds_total The age of the NFS mount in seconds. +# TYPE node_mountstats_nfs_age_seconds_total counter +node_mountstats_nfs_age_seconds_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 13968 +node_mountstats_nfs_age_seconds_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 13968 +# HELP node_mountstats_nfs_mountpoint_info Info metric for an NFS mountpoint. +# TYPE node_mountstats_nfs_mountpoint_info gauge +node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test",protocol="tcp"} 1 +node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="tcp"} 1 +node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="udp"} 1 +` + + if err := testutil.GatherAndCompare( + reg, + strings.NewReader(expected), + "node_mountstats_nfs_age_seconds_total", + "node_mountstats_nfs_mountpoint_info", + ); err != nil { + t.Fatal(err) + } +}