Skip to content

Commit 9bdd7fc

Browse files
fix: bind-mount /proc/thread-self/net over /proc/net in namespaces (#5)
After setns(CLONE_NEWNET), /proc/net (a symlink to self/net) still resolves to the thread group leader's network namespace, not the calling thread's. Only /proc/thread-self/net reflects the new namespace. This caused libraries like netwatch that read /proc/net/route to get the host's default route interface (e.g. enp7s0) instead of the namespace's eth0, breaking socket binding after link flaps in iroh. Fix: always create a private mount namespace (CLONE_NEWNS) on every namespace thread and bind-mount /proc/thread-self/net over /proc/net. Also make the tokio blocking pool on_thread_start unconditional so spawned blocking threads get the same fix.
1 parent 1637f14 commit 9bdd7fc

2 files changed

Lines changed: 86 additions & 10 deletions

File tree

patchbay/src/netns.rs

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,21 +75,52 @@ fn setup_namespace_thread(
7575
crate::ns_tracing::install_namespace_subscriber(log_name, run_dir.map(|p| p.as_path()))
7676
}
7777

78-
/// Private mount namespace + optional DNS overlay bind-mounts.
78+
/// Private mount namespace + remount `/proc` + optional DNS overlay bind-mounts.
7979
/// Called on every thread that enters a namespace (sync, async, user, blocking pool).
80+
///
81+
/// We always create a private mount namespace and remount `/proc` so that
82+
/// `/proc/net/route` (and other `/proc/net/*` files) reflect *this* network
83+
/// namespace's state instead of the host's. Without this, libraries that read
84+
/// `/proc/net/route` (e.g. netwatch) get the host's default route interface.
8085
fn apply_mount_overlay(overlay: Option<&DnsOverlay>) {
81-
if overlay.is_some() {
82-
if let Err(e) = unshare(CloneFlags::CLONE_NEWNS) {
83-
tracing::warn!(
84-
"unshare(CLONE_NEWNS) failed: {e} — DNS overlay bind-mounts may affect the host"
85-
);
86-
}
86+
if let Err(e) = unshare(CloneFlags::CLONE_NEWNS) {
87+
tracing::warn!(
88+
"unshare(CLONE_NEWNS) failed: {e} — /proc and DNS overlays may show host data"
89+
);
90+
} else {
91+
fixup_proc_net();
8792
}
8893
if let Some(o) = overlay {
8994
o.apply();
9095
}
9196
}
9297

98+
/// Bind-mount `/proc/thread-self/net` over `/proc/net` so that
99+
/// `/proc/net/route` (and other `/proc/net/*` files) reflect *this thread's*
100+
/// network namespace instead of the process's original one.
101+
///
102+
/// On Linux, `/proc/net` is a symlink to `self/net` which resolves to the
103+
/// *thread group leader's* network namespace, not the calling thread's. After
104+
/// `setns(CLONE_NEWNET)`, only `/proc/thread-self/net` reflects the new
105+
/// namespace. This bind-mount makes the standard `/proc/net/route` path work
106+
/// for libraries like `netwatch` that don't know about `thread-self`.
107+
fn fixup_proc_net() {
108+
// First remove the symlink so we can mount over it
109+
let ret = unsafe {
110+
libc::mount(
111+
c"/proc/thread-self/net".as_ptr(),
112+
c"/proc/net".as_ptr(),
113+
std::ptr::null(),
114+
libc::MS_BIND,
115+
std::ptr::null(),
116+
)
117+
};
118+
if ret != 0 {
119+
let err = std::io::Error::last_os_error();
120+
tracing::warn!("bind-mount /proc/thread-self/net -> /proc/net failed: {err}");
121+
}
122+
}
123+
93124
/// Enters an existing namespace via `setns` and applies mount overlay.
94125
fn enter_namespace(fd: &File, overlay: Option<&DnsOverlay>) -> Result<()> {
95126
setns(fd, CloneFlags::CLONE_NEWNET).context("setns CLONE_NEWNET")?;
@@ -240,9 +271,10 @@ impl Worker {
240271
let ns_fd = open_current_thread_netns_fd()?;
241272
let mut builder = tokio::runtime::Builder::new_current_thread();
242273
builder.enable_all();
243-
if let Some(overlay) = thread_opts.dns_overlay.clone() {
244-
builder.on_thread_start(move || apply_mount_overlay(Some(&overlay)));
245-
}
274+
let overlay_for_threads = thread_opts.dns_overlay.clone();
275+
builder.on_thread_start(move || {
276+
apply_mount_overlay(overlay_for_threads.as_ref())
277+
});
246278
let rt = builder.build().context("build tokio runtime")?;
247279
Ok((ns_fd, rt))
248280
})();

patchbay/src/tests/route.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,3 +214,47 @@ async fn replug_iface_reflexive_ip() -> Result<()> {
214214
);
215215
Ok(())
216216
}
217+
218+
/// /proc/net/route inside a device namespace must reflect the namespace's
219+
/// routing table, not the host's. Without a private /proc mount, netwatch
220+
/// reads the host's default route interface (e.g. enp7s0) instead of eth0,
221+
/// causing iroh to bind sockets to a non-existent interface after link flaps.
222+
#[tokio::test(flavor = "current_thread")]
223+
#[traced_test]
224+
async fn proc_net_route_shows_namespace_routes() -> Result<()> {
225+
check_caps()?;
226+
let lab = Lab::new().await?;
227+
let isp = lab.add_router("isp1").build().await?;
228+
let home = lab
229+
.add_router("home1")
230+
.upstream(isp.id())
231+
.nat(Nat::Home)
232+
.build()
233+
.await?;
234+
let dev = lab
235+
.add_device("dev1")
236+
.iface("eth0", home.id(), None)
237+
.build()
238+
.await?;
239+
240+
let route_content = dev.run_sync(|| {
241+
std::fs::read_to_string("/proc/net/route").context("read /proc/net/route")
242+
})?;
243+
244+
// The namespace must contain eth0 with a default route.
245+
assert!(
246+
route_content.contains("eth0"),
247+
"/proc/net/route should contain eth0 but got:\n{route_content}"
248+
);
249+
250+
// No host interfaces should leak into the namespace.
251+
for line in route_content.lines().skip(1) {
252+
let iface = line.split_ascii_whitespace().next().unwrap_or("");
253+
assert!(
254+
iface == "eth0" || iface == "lo" || iface.is_empty(),
255+
"unexpected host interface '{iface}' in namespace /proc/net/route:\n{route_content}"
256+
);
257+
}
258+
259+
Ok(())
260+
}

0 commit comments

Comments
 (0)