Skip to content

Commit d5a2d3f

Browse files
committed
Use poll instead of pselect6
pselect6 is limited to FD_SETSIZE, which is 1024 in most cases. When a application holds many fds, this can be reached easily, resulting in a panic when the fd is added to the fd set. Instead of pselect6 use poll, which doesn't have such a limitation.
1 parent 1db35da commit d5a2d3f

2 files changed

Lines changed: 65 additions & 8 deletions

File tree

socket.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
)
99

1010
// isReadReady reports whether the netlink connection is ready for reading.
11-
// It uses pselect6 with a zero timeout on the underlying raw connection.
11+
// It uses poll(2) with a zero timeout on the underlying raw connection.
1212
// This allows for an efficient check of socket readiness without blocking.
1313
// If the Conn was created with a TestDial function, it assumes readiness.
1414
func (cc *Conn) isReadReady(conn *netlink.Conn) (bool, error) {
@@ -24,13 +24,12 @@ func (cc *Conn) isReadReady(conn *netlink.Conn) (bool, error) {
2424
var n int
2525
var opErr error
2626
err = rawConn.Control(func(fd uintptr) {
27-
var readfds unix.FdSet
28-
readfds.Zero()
29-
readfds.Set(int(fd))
30-
31-
ts := &unix.Timespec{} // zero timeout: immediate return
27+
fds := []unix.PollFd{{
28+
Fd: int32(fd),
29+
Events: unix.POLLIN,
30+
}}
3231
for {
33-
n, opErr = unix.Pselect(int(fd)+1, &readfds, nil, nil, ts, nil)
32+
n, opErr = unix.Poll(fds, 0) // 0 timeout: immediate return
3433
if opErr != unix.EINTR {
3534
break
3635
}
@@ -41,7 +40,7 @@ func (cc *Conn) isReadReady(conn *netlink.Conn) (bool, error) {
4140
}
4241

4342
if opErr != nil {
44-
return false, fmt.Errorf("pselect6: %w", opErr)
43+
return false, fmt.Errorf("poll: %w", opErr)
4544
}
4645

4746
return n > 0, nil

socket_test.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package nftables_test
2+
3+
import (
4+
"os"
5+
"testing"
6+
7+
"github.com/google/nftables"
8+
"github.com/google/nftables/internal/nftest"
9+
"golang.org/x/sys/unix"
10+
)
11+
12+
// TestIsReadReadyHighFD verifies that the nftables library works correctly when
13+
// the underlying netlink socket gets an fd >= FD_SETSIZE (1024). The old
14+
// pselect-based implementation would panic in this scenario because
15+
// unix.FdSet.Set panics for fd >= FD_SETSIZE. The current poll-based
16+
// implementation has no such limit.
17+
func TestIsReadReadyHighFD(t *testing.T) {
18+
c, newNS := nftest.OpenSystemConn(t, *enableSysTests)
19+
defer nftest.CleanupSystemConn(t, newNS)
20+
21+
// Exhaust low file descriptors so the next socket allocation gets fd >= FD_SETSIZE.
22+
var fillers []*os.File
23+
defer func() {
24+
for _, f := range fillers {
25+
f.Close()
26+
}
27+
}()
28+
29+
for {
30+
f, err := os.Open("/dev/null")
31+
if err != nil {
32+
t.Fatalf("os.Open(/dev/null): %v", err)
33+
}
34+
fillers = append(fillers, f)
35+
if int(f.Fd()) >= unix.FD_SETSIZE {
36+
break
37+
}
38+
}
39+
t.Logf("exhausted fds up to %d", fillers[len(fillers)-1].Fd())
40+
41+
// By default, a transient socket is created for each request. The socket
42+
// will get an fd >= FD_SETSIZE. With the old pselect code this would panic;
43+
// with poll it must work.
44+
45+
// Add a command and flush it to trigger the isReadReady code path.
46+
c.AddTable(&nftables.Table{Name: "test_high_fd", Family: nftables.TableFamilyIPv4})
47+
func() {
48+
// turn the potential panic into a test failure.
49+
defer func() {
50+
if r := recover(); r != nil {
51+
t.Fatalf("isReadReady panicked for fd >= %d: %v", unix.FD_SETSIZE, r)
52+
}
53+
}()
54+
if err := c.Flush(); err != nil {
55+
t.Fatalf("Flush() failed: %v", err)
56+
}
57+
}()
58+
}

0 commit comments

Comments
 (0)