Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ hwmon | chip | --collector.hwmon.chip-include | --collector.hwmon.chip-exclude
hwmon | sensor | --collector.hwmon.sensor-include | --collector.hwmon.sensor-exclude
interrupts | name | --collector.interrupts.name-include | --collector.interrupts.name-exclude
netdev | device | --collector.netdev.device-include | --collector.netdev.device-exclude
netvf | device | --collector.netvf.device-include | --collector.netvf.device-exclude
qdisk | device | --collector.qdisk.device-include | --collector.qdisk.device-exclude
slabinfo | slab-names | --collector.slabinfo.slabs-include | --collector.slabinfo.slabs-exclude
sysctl | all | --collector.sysctl.include | N/A
Expand Down Expand Up @@ -202,6 +203,7 @@ logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/So
meminfo\_numa | Exposes memory statistics from `/sys/devices/system/node/node[0-9]*/meminfo`, `/sys/devices/system/node/node[0-9]*/numastat`. | Linux
mountstats | Exposes filesystem statistics from `/proc/self/mountstats`. Exposes detailed NFS client statistics. | Linux
network_route | Exposes the routing table as metrics | Linux
netvf | Exposes SR-IOV Virtual Function statistics and configuration from netlink. | Linux
pcidevice | Exposes pci devices' information including their link status and parent devices. | Linux
perf | Exposes perf based metrics (Warning: Metrics are dependent on kernel configuration and settings). | Linux
processes | Exposes aggregate process statistics from `/proc`. | Linux
Expand Down
227 changes: 227 additions & 0 deletions collector/netvf_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !nonetvf

package collector

import (
"errors"
"fmt"
"log/slog"

"github.com/alecthomas/kingpin/v2"
"github.com/jsimonetti/rtnetlink/v2"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs/sysfs"
)

const netvfSubsystem = "net_vf"

var (
netvfDeviceInclude = kingpin.Flag("collector.netvf.device-include", "Regexp of PF devices to include (mutually exclusive to device-exclude).").String()
netvfDeviceExclude = kingpin.Flag("collector.netvf.device-exclude", "Regexp of PF devices to exclude (mutually exclusive to device-include).").String()
)

func init() {
registerCollector("netvf", defaultDisabled, NewNetVFCollector)
}

type netvfCollector struct {
logger *slog.Logger
deviceFilter deviceFilter

info *prometheus.Desc
receivePackets *prometheus.Desc
transmitPackets *prometheus.Desc
receiveBytes *prometheus.Desc
transmitBytes *prometheus.Desc
broadcast *prometheus.Desc
multicast *prometheus.Desc
receiveDropped *prometheus.Desc
transmitDropped *prometheus.Desc
}

func NewNetVFCollector(logger *slog.Logger) (Collector, error) {
if *netvfDeviceExclude != "" && *netvfDeviceInclude != "" {
return nil, errors.New("device-exclude & device-include are mutually exclusive")
}

if *netvfDeviceExclude != "" {
logger.Info("Parsed flag --collector.netvf.device-exclude", "flag", *netvfDeviceExclude)
}

if *netvfDeviceInclude != "" {
logger.Info("Parsed flag --collector.netvf.device-include", "flag", *netvfDeviceInclude)
}

return &netvfCollector{
logger: logger,
deviceFilter: newDeviceFilter(*netvfDeviceExclude, *netvfDeviceInclude),
info: prometheus.NewDesc(
prometheus.BuildFQName(namespace, netvfSubsystem, "info"),
"Virtual Function configuration information.",
[]string{"device", "vf", "mac", "vlan", "link_state", "spoof_check", "trust", "pci_address", "numa_node"}, nil,
),
receivePackets: prometheus.NewDesc(
prometheus.BuildFQName(namespace, netvfSubsystem, "receive_packets_total"),
"Number of received packets by the VF.",
[]string{"device", "vf", "pci_address", "numa_node"}, nil,
),
transmitPackets: prometheus.NewDesc(
prometheus.BuildFQName(namespace, netvfSubsystem, "transmit_packets_total"),
"Number of transmitted packets by the VF.",
[]string{"device", "vf", "pci_address", "numa_node"}, nil,
),
receiveBytes: prometheus.NewDesc(
prometheus.BuildFQName(namespace, netvfSubsystem, "receive_bytes_total"),
"Number of received bytes by the VF.",
[]string{"device", "vf", "pci_address", "numa_node"}, nil,
),
transmitBytes: prometheus.NewDesc(
prometheus.BuildFQName(namespace, netvfSubsystem, "transmit_bytes_total"),
"Number of transmitted bytes by the VF.",
[]string{"device", "vf", "pci_address", "numa_node"}, nil,
),
broadcast: prometheus.NewDesc(
prometheus.BuildFQName(namespace, netvfSubsystem, "broadcast_packets_total"),
"Number of broadcast packets received by the VF.",
[]string{"device", "vf", "pci_address", "numa_node"}, nil,
),
multicast: prometheus.NewDesc(
prometheus.BuildFQName(namespace, netvfSubsystem, "multicast_packets_total"),
"Number of multicast packets received by the VF.",
[]string{"device", "vf", "pci_address", "numa_node"}, nil,
),
receiveDropped: prometheus.NewDesc(
prometheus.BuildFQName(namespace, netvfSubsystem, "receive_dropped_total"),
"Number of dropped received packets by the VF.",
[]string{"device", "vf", "pci_address", "numa_node"}, nil,
),
transmitDropped: prometheus.NewDesc(
prometheus.BuildFQName(namespace, netvfSubsystem, "transmit_dropped_total"),
"Number of dropped transmitted packets by the VF.",
[]string{"device", "vf", "pci_address", "numa_node"}, nil,
),
}, nil
}

func (c *netvfCollector) Update(ch chan<- prometheus.Metric) error {
conn, err := rtnetlink.Dial(nil)
if err != nil {
return fmt.Errorf("failed to connect to rtnetlink: %w", err)
}
defer conn.Close()

links, err := conn.Link.ListWithVFInfo()
if err != nil {
return fmt.Errorf("failed to list interfaces with VF info: %w", err)
}

sysFS, sysErr := sysfs.NewFS(sysFilePath(""))

vfCount := 0
for _, link := range links {
if link.Attributes == nil {
continue
}

// Skip interfaces without VFs
if link.Attributes.NumVF == nil || *link.Attributes.NumVF == 0 {
continue
}

device := link.Attributes.Name

// Apply device filter
if c.deviceFilter.ignored(device) {
c.logger.Debug("Ignoring device", "device", device)
continue
}

// Resolve PCI device once per PF to get NUMA node and VF addresses.
numaNode := "-1"
var pciDev *sysfs.PciDevice
if sysErr == nil {
if dev, err := sysFS.NetClassPCIDevice(device); err == nil {
pciDev = dev
if dev.NumaNode != nil {
numaNode = fmt.Sprintf("%d", *dev.NumaNode)
}
}
}

for _, vf := range link.Attributes.VFInfoList {
vfID := fmt.Sprintf("%d", vf.ID)

// Emit info metric with VF configuration
mac := ""
if vf.MAC != nil {
mac = vf.MAC.String()
}
vlan := fmt.Sprintf("%d", vf.Vlan)
linkState := vfLinkStateString(vf.LinkState)
spoofCheck := fmt.Sprintf("%t", vf.SpoofCheck)
trust := fmt.Sprintf("%t", vf.Trust)

pciAddress := ""
if pciDev != nil {
if addr, err := sysFS.PciDeviceVFAddress(pciDev, vf.ID); err == nil {
pciAddress = addr
}
}

ch <- prometheus.MustNewConstMetric(c.info, prometheus.GaugeValue, 1, device, vfID, mac, vlan, linkState, spoofCheck, trust, pciAddress, numaNode)

// Emit stats metrics if available
if vf.Stats == nil {
c.logger.Debug("VF has no stats", "device", device, "vf", vf.ID)
vfCount++
continue
}

stats := vf.Stats

ch <- prometheus.MustNewConstMetric(c.receivePackets, prometheus.CounterValue, float64(stats.RxPackets), device, vfID, pciAddress, numaNode)
ch <- prometheus.MustNewConstMetric(c.transmitPackets, prometheus.CounterValue, float64(stats.TxPackets), device, vfID, pciAddress, numaNode)
ch <- prometheus.MustNewConstMetric(c.receiveBytes, prometheus.CounterValue, float64(stats.RxBytes), device, vfID, pciAddress, numaNode)
ch <- prometheus.MustNewConstMetric(c.transmitBytes, prometheus.CounterValue, float64(stats.TxBytes), device, vfID, pciAddress, numaNode)
ch <- prometheus.MustNewConstMetric(c.broadcast, prometheus.CounterValue, float64(stats.Broadcast), device, vfID, pciAddress, numaNode)
ch <- prometheus.MustNewConstMetric(c.multicast, prometheus.CounterValue, float64(stats.Multicast), device, vfID, pciAddress, numaNode)
ch <- prometheus.MustNewConstMetric(c.receiveDropped, prometheus.CounterValue, float64(stats.RxDropped), device, vfID, pciAddress, numaNode)
ch <- prometheus.MustNewConstMetric(c.transmitDropped, prometheus.CounterValue, float64(stats.TxDropped), device, vfID, pciAddress, numaNode)

vfCount++
}
}

if vfCount == 0 {
return ErrNoData
}

return nil
}

func vfLinkStateString(state rtnetlink.VFLinkState) string {
switch state {
case rtnetlink.VFLinkStateAuto:
return "auto"
case rtnetlink.VFLinkStateEnable:
return "enable"
case rtnetlink.VFLinkStateDisable:
return "disable"
default:
return "unknown"
}
}

Loading