From 34a3287eb8d051ace66e94e1adee5d720fae9a31 Mon Sep 17 00:00:00 2001 From: Peter M Date: Wed, 4 Mar 2026 13:10:22 +0100 Subject: [PATCH 1/4] Fix use-after-free race in socket driver close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://ampcode.com/threads/T-019cb8b8-9e4c-7316-9566-c7e3f5f2b6db Fix a use-after-free race condition in the generic_unix socket driver's close handler, detected by Valgrind during CI gen_tcp tests. The close handler in socket_consume_mailbox used a two-phase locking pattern: it acquired the glb->listeners lock to NULL-out the socket_data listener pointers, released it, then called sys_unregister_listener (which re-acquires the lock) to remove the listener from the linked list. Between the unlock and re-lock, the event loop thread could also unlink the same listener node via process_listener_handler after the callback returned NULL. The subsequent list_remove in sys_unregister_listener then operated on stale prev/next pointers, corrupting the list or writing to freed memory. The fix makes the pointer detach and list unlink atomic under a single lock hold by introducing sys_unregister_listener_nolock — a variant that assumes the caller already holds the glb->listeners write lock. The close handler now NULLs the pointers, unlinks the listeners, and releases the lock before freeing the memory. This pattern is specific to generic_unix; ESP32 and RP2 use a single global listener for the socket driver subsystem and are not affected. Signed-off-by: Peter M --- .../generic_unix/lib/socket_driver.c | 29 +++++++++---------- src/platforms/generic_unix/lib/sys.c | 6 ++++ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/platforms/generic_unix/lib/socket_driver.c b/src/platforms/generic_unix/lib/socket_driver.c index 7e9439622d..7ff51b31e0 100644 --- a/src/platforms/generic_unix/lib/socket_driver.c +++ b/src/platforms/generic_unix/lib/socket_driver.c @@ -47,6 +47,8 @@ #include #include +void sys_unregister_listener_nolock(GlobalContext *global, struct EventListener *listener); + // #define ENABLE_TRACE #include "trace.h" @@ -1194,31 +1196,26 @@ static NativeHandlerResult socket_consume_mailbox(Context *ctx) TRACE("close\n"); port_send_reply(ctx, pid, ref, OK_ATOM); SocketDriverData *socket_data = (SocketDriverData *) ctx->platform_data; - // Callbacks (active_recv_callback, passive_recv_callback) are called - // while glb->listeners lock is held. They may want to free the - // listener, causing a potential double free here. - // We acquire the lock on listeners here and set the listeners - // to NULL in the socket_data structure to prevent them from freeing - // the listeners. + // Callbacks (active_recv_callback, passive_recv_callback, accept_callback) + // are called while glb->listeners lock is held. They may free the + // listener and set the socket_data pointer to NULL. + // We must atomically detach the pointers AND unlink from the listeners + // list under the same lock hold, to prevent a race where the callback + // also unlinks the same listener node. synclist_wrlock(&glb->listeners); ActiveRecvListener *active_listener = socket_data->active_listener; PassiveRecvListener *passive_listener = socket_data->passive_listener; socket_data->active_listener = NULL; socket_data->passive_listener = NULL; - synclist_unlock(&glb->listeners); if (active_listener) { - // Then we unregister, which also acquires the lock. The callbacks - // may have returned NULL which means the listener would no longer - // be registered, but this will work. - sys_unregister_listener(glb, &active_listener->base); - // After the listener is unregistered, the callbacks can no longer - // be called, so we can eventually free the listener - free(active_listener); + sys_unregister_listener_nolock(glb, &active_listener->base); } if (passive_listener) { - sys_unregister_listener(glb, &passive_listener->base); - free(passive_listener); + sys_unregister_listener_nolock(glb, &passive_listener->base); } + synclist_unlock(&glb->listeners); + free(active_listener); + free(passive_listener); socket_driver_do_close(ctx); // We don't need to remove message. return NativeTerminate; diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c index 709fd4ba8c..1056ff4d47 100644 --- a/src/platforms/generic_unix/lib/sys.c +++ b/src/platforms/generic_unix/lib/sys.c @@ -698,6 +698,12 @@ void sys_unregister_listener(GlobalContext *global, struct EventListener *listen synclist_remove(&global->listeners, &listener->listeners_list_head); } +void sys_unregister_listener_nolock(GlobalContext *global, struct EventListener *listener) +{ + listener_event_remove_from_polling_set(listener->fd, global); + list_remove(&listener->listeners_list_head); +} + void sys_register_select_event(GlobalContext *global, ErlNifEvent event, bool is_write) { struct GenericUnixPlatformData *platform = global->platform_data; From bf13b727c3cda6dce077cd49c9a1e79ad15b3802 Mon Sep 17 00:00:00 2001 From: Peter M Date: Wed, 11 Mar 2026 15:45:32 +0100 Subject: [PATCH 2/4] Fix listener publication race in socket driver That commit fixed the close-time double-unlink/use-after-free race in generic_unix listener teardown. This change addresses a separate race in listener registration, where a listener could become visible to the event loop before socket_data published the corresponding pointer. Both fixes are needed; this patch complements the earlier teardown fix rather than replacing it. Fix a race in the generic_unix socket driver where newly created listeners were registered in the global listener list before the corresponding socket_data->{active,passive}_listener pointer was published. If the event loop processed the listener in that window, the callback could consume, free, or replace the listener before the socket driver stored the pointer. The later assignment then left socket_data pointing at stale listener memory, which could surface as random hangs or corruption in gen_tcp tests, including timeouts waiting for the server helper process to start. Publish the listener pointer before calling sys_register_listener in all affected paths: active UDP receive listener setup active TCP receive listener setup passive recv/recvfrom listener setup accept listener setup This complements the earlier close-path fix by removing another generic_unix listener lifecycle race. Signed-off-by: Peter M --- src/platforms/generic_unix/lib/socket_driver.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/platforms/generic_unix/lib/socket_driver.c b/src/platforms/generic_unix/lib/socket_driver.c index 7ff51b31e0..ffa0b7eb7d 100644 --- a/src/platforms/generic_unix/lib/socket_driver.c +++ b/src/platforms/generic_unix/lib/socket_driver.c @@ -255,8 +255,8 @@ static term init_udp_socket(Context *ctx, SocketDriverData *socket_data, term pa listener->base.handler = active_recvfrom_callback; listener->buf_size = socket_data->buffer; listener->process_id = ctx->process_id; - sys_register_listener(glb, &listener->base); socket_data->active_listener = listener; + sys_register_listener(glb, &listener->base); } } return ret; @@ -340,8 +340,8 @@ static term init_client_tcp_socket(Context *ctx, SocketDriverData *socket_data, listener->base.handler = active_recv_callback; listener->buf_size = socket_data->buffer; listener->process_id = ctx->process_id; - sys_register_listener(glb, &listener->base); socket_data->active_listener = listener; + sys_register_listener(glb, &listener->base); } } return ret; @@ -1017,8 +1017,8 @@ static void do_recv(Context *ctx, term pid, term ref, term length, term timeout, listener->length = term_to_int(length); listener->buffer = socket_data->buffer; listener->ref_ticks = term_to_ref_ticks(ref); - sys_register_listener(glb, &listener->base); socket_data->passive_listener = listener; + sys_register_listener(glb, &listener->base); } void socket_driver_do_recvfrom(Context *ctx, term pid, term ref, term length, term timeout) @@ -1119,8 +1119,8 @@ void socket_driver_do_accept(Context *ctx, term pid, term ref, term timeout) listener->length = 0; listener->buffer = 0; listener->ref_ticks = term_to_ref_ticks(ref); - sys_register_listener(glb, &listener->base); socket_data->passive_listener = listener; + sys_register_listener(glb, &listener->base); } static NativeHandlerResult socket_consume_mailbox(Context *ctx) From 16a45d294d3a613dd8295ae7a22454015f21c288 Mon Sep 17 00:00:00 2001 From: Peter M Date: Sat, 14 Mar 2026 18:34:44 +0100 Subject: [PATCH 3/4] Set accepted sockets nonblocking Configure newly accepted generic_unix TCP sockets as nonblocking before publishing them to the socket driver machinery. If fcntl fails, close the accepted fd and reply with an error so callers never observe a partially initialized connection. Signed-off-by: Peter M --- src/platforms/generic_unix/lib/socket_driver.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/platforms/generic_unix/lib/socket_driver.c b/src/platforms/generic_unix/lib/socket_driver.c index ffa0b7eb7d..b9daa38229 100644 --- a/src/platforms/generic_unix/lib/socket_driver.c +++ b/src/platforms/generic_unix/lib/socket_driver.c @@ -1060,6 +1060,21 @@ static EventListener *accept_callback(GlobalContext *glb, EventListener *base_li TRACE("socket_driver|accept_callback: accepted connection. fd: %i\n", fd); term pid = listener->pid; + if (UNLIKELY(fcntl(fd, F_SETFL, O_NONBLOCK) == -1)) { + int err = errno; + close(fd); + BEGIN_WITH_STACK_HEAP(12, heap); + term ref = term_from_ref_ticks(listener->ref_ticks, &heap); + term reply = port_heap_create_reply(&heap, ref, port_heap_create_sys_error_tuple(&heap, FCNTL_ATOM, err)); + port_send_message_nolock(glb, pid, reply); + END_WITH_STACK_HEAP(heap, glb); + globalcontext_get_process_unlock(glb, ctx); + if (socket_data->passive_listener) { + socket_data->passive_listener = NULL; + free(listener); + } + return NULL; + } SocketDriverData *new_socket_data = socket_driver_create_data(); new_socket_data->sockfd = fd; new_socket_data->proto = socket_data->proto; From a2f2ad386651a1262c57a593539edfdcc585f75b Mon Sep 17 00:00:00 2001 From: Peter M Date: Sat, 14 Mar 2026 20:36:00 +0100 Subject: [PATCH 4/4] Fix NULL dereference in accept_callback when process terminates If the owning process terminates between the accept() call and globalcontext_get_process_lock(), ctx will be NULL. The code immediately dereferences ctx->platform_data without checking, causing a segfault. Add a NULL check consistent with other callbacks (e.g. recv_callback), closing the accepted fd if needed and freeing the listener before returning. Amp-Thread-ID: https://ampcode.com/threads/T-019ced69-2d93-772b-b884-c90e4cbaefe5 Co-authored-by: Amp --- src/platforms/generic_unix/lib/socket_driver.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/platforms/generic_unix/lib/socket_driver.c b/src/platforms/generic_unix/lib/socket_driver.c index b9daa38229..f7f35ff2e3 100644 --- a/src/platforms/generic_unix/lib/socket_driver.c +++ b/src/platforms/generic_unix/lib/socket_driver.c @@ -1046,6 +1046,13 @@ static EventListener *accept_callback(GlobalContext *glb, EventListener *base_li socklen_t clientlen = sizeof(clientaddr); int fd = accept(listener->base.fd, (struct sockaddr *) &clientaddr, &clientlen); Context *ctx = globalcontext_get_process_lock(glb, listener->process_id); + if (UNLIKELY(ctx == NULL)) { + if (fd != -1) { + close(fd); + } + free(listener); + return NULL; + } SocketDriverData *socket_data = (SocketDriverData *) ctx->platform_data; EventListener *result = NULL; if (fd == -1) {