Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions collector/fixtures/e2e-64k-page-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2177,6 +2177,11 @@ node_mountstats_nfs_event_vfs_write_pages_total{export="192.168.1.1:/srv/test",m
# TYPE node_mountstats_nfs_event_write_extension_total counter
node_mountstats_nfs_event_write_extension_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0
node_mountstats_nfs_event_write_extension_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0
# HELP node_mountstats_nfs_mountpoint_info Info metric for an NFS mountpoint.
# TYPE node_mountstats_nfs_mountpoint_info gauge
node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test",protocol="tcp"} 1
node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="tcp"} 1
node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="udp"} 1
# HELP node_mountstats_nfs_operations_major_timeouts_total Number of times a request has had a major timeout for a given operation.
# TYPE node_mountstats_nfs_operations_major_timeouts_total counter
node_mountstats_nfs_operations_major_timeouts_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",operation="ACCESS",protocol="udp"} 0
Expand Down
5 changes: 5 additions & 0 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2209,6 +2209,11 @@ node_mountstats_nfs_event_vfs_write_pages_total{export="192.168.1.1:/srv/test",m
# TYPE node_mountstats_nfs_event_write_extension_total counter
node_mountstats_nfs_event_write_extension_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0
node_mountstats_nfs_event_write_extension_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0
# HELP node_mountstats_nfs_mountpoint_info Info metric for an NFS mountpoint.
# TYPE node_mountstats_nfs_mountpoint_info gauge
node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test",protocol="tcp"} 1
node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="tcp"} 1
node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="udp"} 1
# HELP node_mountstats_nfs_operations_major_timeouts_total Number of times a request has had a major timeout for a given operation.
# TYPE node_mountstats_nfs_operations_major_timeouts_total counter
node_mountstats_nfs_operations_major_timeouts_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",operation="ACCESS",protocol="udp"} 0
Expand Down
4 changes: 2 additions & 2 deletions collector/fixtures/proc/10/mountinfo
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
17 21 0:4 / /proc rw,nosuid,nodev,noexec,relatime shared:12 - proc proc rw
21 0 8:1 / / rw,relatime shared:1 - ext4 /dev/sda1 rw,errors=remount-ro,data=ordered
194 21 0:42 / /mnt/nfs/test rw shared:144 - nfs4 192.168.1.1:/srv/test rw,vers=4.0,rsize=1048576,wsize=1048576,namlen=255,acregmin=3,acregmax=60,acdirmin=30,acdirmax=60,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=192.168.1.5,addr=192.168.1.1,local_lock=none
177 21 0:42 / /mnt/nfs/test rw shared:130 - nfs4 192.168.1.1:/srv/test rw,vers=4.0,rsize=1048576,wsize=1048576,namlen=255,acregmin=3,acregmax=60,acdirmin=30,acdirmax=60,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=192.168.1.5,addr=192.168.1.1,local_lock=none
1398 798 0:44 / /mnt/nfs/test rw,relatime shared:1154 - nfs 192.168.1.1:/srv/test rw,vers=3,rsize=32768,wsize=32768,namlen=255,hard,proto=udp,timeo=11,retrans=3,sec=sys,mountaddr=192.168.1.1,mountvers=3,mountport=49602,mountproto=udp,local_lock=none,addr=192.168.1.1
177 21 0:42 / /mnt/nfs/test-dupe rw shared:130 - nfs4 192.168.1.1:/srv/test rw,vers=4.0,rsize=1048576,wsize=1048576,namlen=255,acregmin=3,acregmax=60,acdirmin=30,acdirmax=60,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=192.168.1.5,addr=192.168.1.1,local_lock=none
1398 798 0:44 / /mnt/nfs/test-dupe rw,relatime shared:1154 - nfs 192.168.1.1:/srv/test rw,vers=3,rsize=32768,wsize=32768,namlen=255,hard,proto=udp,timeo=11,retrans=3,sec=sys,mountaddr=192.168.1.1,mountvers=3,mountport=49602,mountproto=udp,local_lock=none,addr=192.168.1.1
61 changes: 48 additions & 13 deletions collector/mountstats_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ var (
type mountStatsCollector struct {
// General statistics
NFSAgeSecondsTotal *prometheus.Desc
NFSMountpointInfo *prometheus.Desc

// Byte statistics
NFSReadBytesTotal *prometheus.Desc
Expand Down Expand Up @@ -105,6 +106,13 @@ type nfsDeviceIdentifier struct {
MountAddress string
}

type nfsMountpointIdentifier struct {
Device string
Protocol string
MountAddress string
MountPoint string
}

func init() {
registerCollector("mountstats", defaultDisabled, NewMountStatsCollector)
}
Expand All @@ -127,9 +135,10 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) {
)

var (
labels = []string{"export", "protocol", "mountaddr"}
opLabels = []string{"export", "protocol", "mountaddr", "operation"}
translabels = []string{"export", "protocol", "mountaddr", "transport"}
labels = []string{"export", "protocol", "mountaddr"}
infoLabels = []string{"export", "protocol", "mountaddr", "mountpoint"}
opLabels = []string{"export", "protocol", "mountaddr", "operation"}
transportLabels = []string{"export", "protocol", "mountaddr", "transport"}
)

return &mountStatsCollector{
Expand All @@ -140,6 +149,13 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) {
nil,
),

NFSMountpointInfo: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "mountpoint_info"),
"Info metric for an NFS mountpoint.",
infoLabels,
nil,
),

NFSReadBytesTotal: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "read_bytes_total"),
"Number of bytes read using the read() syscall.",
Expand Down Expand Up @@ -199,70 +215,70 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) {
NFSTransportBindTotal: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "transport_bind_total"),
"Number of times the client has had to establish a connection from scratch to the NFS server.",
translabels,
transportLabels,
nil,
),

NFSTransportConnectTotal: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "transport_connect_total"),
"Number of times the client has made a TCP connection to the NFS server.",
translabels,
transportLabels,
nil,
),

NFSTransportIdleTimeSeconds: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "transport_idle_time_seconds"),
"Duration since the NFS mount last saw any RPC traffic, in seconds.",
translabels,
transportLabels,
nil,
),

NFSTransportSendsTotal: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "transport_sends_total"),
"Number of RPC requests for this mount sent to the NFS server.",
translabels,
transportLabels,
nil,
),

NFSTransportReceivesTotal: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "transport_receives_total"),
"Number of RPC responses for this mount received from the NFS server.",
translabels,
transportLabels,
nil,
),

NFSTransportBadTransactionIDsTotal: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "transport_bad_transaction_ids_total"),
"Number of times the NFS server sent a response with a transaction ID unknown to this client.",
translabels,
transportLabels,
nil,
),

NFSTransportBacklogQueueTotal: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "transport_backlog_queue_total"),
"Total number of items added to the RPC backlog queue.",
translabels,
transportLabels,
nil,
),

NFSTransportMaximumRPCSlots: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "transport_maximum_rpc_slots"),
"Maximum number of simultaneously active RPC requests ever used.",
translabels,
transportLabels,
nil,
),

NFSTransportSendingQueueTotal: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "transport_sending_queue_total"),
"Total number of items added to the RPC transmission sending queue.",
translabels,
transportLabels,
nil,
),

NFSTransportPendingQueueTotal: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "transport_pending_queue_total"),
"Total number of items added to the RPC transmission pending queue.",
translabels,
transportLabels,
nil,
),

Expand Down Expand Up @@ -522,6 +538,7 @@ func (c *mountStatsCollector) Update(ch chan<- prometheus.Metric) error {

// store all seen nfsDeviceIdentifiers for deduplication
deviceList := make(map[nfsDeviceIdentifier]bool)
mountpointList := make(map[nfsMountpointIdentifier]bool)

for idx, m := range mounts {
// For the time being, only NFS statistics are available via this mechanism
Expand All @@ -531,6 +548,7 @@ func (c *mountStatsCollector) Update(ch chan<- prometheus.Metric) error {
continue
}

mountPoint := m.Mount
var mountAddress string
if idx < len(mountsInfo) {
// The mount entry order in the /proc/self/mountstats and /proc/self/mountinfo is the same.
Expand All @@ -539,6 +557,14 @@ func (c *mountStatsCollector) Update(ch chan<- prometheus.Metric) error {
}

for k := range stats.Transport {
mountpointIdentifier := nfsMountpointIdentifier{m.Device, stats.Transport[k].Protocol, mountAddress, mountPoint}
if mountpointList[mountpointIdentifier] {
c.logger.Debug("Skipping duplicate mountpoint info entry", "device", mountpointIdentifier)
} else {
mountpointList[mountpointIdentifier] = true
c.updateNFSMountpointInfo(ch, m.Device, stats.Transport[k].Protocol, mountAddress, mountPoint)
}

deviceIdentifier := nfsDeviceIdentifier{m.Device, stats.Transport[k].Protocol, mountAddress}
i := deviceList[deviceIdentifier]
if i {
Expand All @@ -553,6 +579,15 @@ func (c *mountStatsCollector) Update(ch chan<- prometheus.Metric) error {
return nil
}

func (c *mountStatsCollector) updateNFSMountpointInfo(ch chan<- prometheus.Metric, export, protocol, mountAddress, mountPoint string) {
ch <- prometheus.MustNewConstMetric(
c.NFSMountpointInfo,
prometheus.GaugeValue,
1,
export, protocol, mountAddress, mountPoint,
)
}

func (c *mountStatsCollector) updateNFSStats(ch chan<- prometheus.Metric, s *procfs.MountStatsNFS, export, protocol, mountAddress string) {
labelValues := []string{export, protocol, mountAddress}
ch <- prometheus.MustNewConstMetric(
Expand Down
79 changes: 79 additions & 0 deletions collector/mountstats_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !nomountstats

package collector

import (
"io"
"log/slog"
"strings"
"testing"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
)

type testMountStatsCollector struct {
c Collector
}

func (c testMountStatsCollector) Collect(ch chan<- prometheus.Metric) {
c.c.Update(ch)
}

func (c testMountStatsCollector) Describe(ch chan<- *prometheus.Desc) {
prometheus.DescribeByCollect(c, ch)
}

func NewTestMountStatsCollector(logger *slog.Logger) (prometheus.Collector, error) {
c, err := NewMountStatsCollector(logger)
if err != nil {
return testMountStatsCollector{}, err
}
return testMountStatsCollector{c: c}, nil
}

func TestMountStatsMountPointInfo(t *testing.T) {
*procPath = "fixtures/proc"

logger := slog.New(slog.NewTextHandler(io.Discard, nil))
c, err := NewTestMountStatsCollector(logger)
if err != nil {
t.Fatal(err)
}

reg := prometheus.NewPedanticRegistry()
reg.MustRegister(c)

expected := `# HELP node_mountstats_nfs_age_seconds_total The age of the NFS mount in seconds.
# TYPE node_mountstats_nfs_age_seconds_total counter
node_mountstats_nfs_age_seconds_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 13968
node_mountstats_nfs_age_seconds_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 13968
# HELP node_mountstats_nfs_mountpoint_info Info metric for an NFS mountpoint.
# TYPE node_mountstats_nfs_mountpoint_info gauge
node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test",protocol="tcp"} 1
node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="tcp"} 1
node_mountstats_nfs_mountpoint_info{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",mountpoint="/mnt/nfs/test-dupe",protocol="udp"} 1
`

if err := testutil.GatherAndCompare(
reg,
strings.NewReader(expected),
"node_mountstats_nfs_age_seconds_total",
"node_mountstats_nfs_mountpoint_info",
); err != nil {
t.Fatal(err)
}
}