Skip to content

Commit ce706d3

Browse files
jaredLundeclaude
andcommitted
fix e2e: add connect_timeout to postgres retry loops
Without connect_timeout, libpq blocks waiting for the kernel TCP connect timeout (~30s) when iptables is dropping packets. The 10s retry deadline expired during the very first attempt, so retries never ran. Fix: set connect_timeout=3 so each attempt fails fast, and widen the deadline to 60s so the loop has room to cycle. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 6ed7f12 commit ce706d3

1 file changed

Lines changed: 21 additions & 8 deletions

File tree

beyond-pg-sink/tests/e2e.rs

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,9 +1748,13 @@ fn replica_recovers_via_archive() {
17481748
assert!(out.status.success());
17491749

17501750
let mut primary_client = {
1751-
let deadline = Instant::now() + Duration::from_secs(10);
1751+
// connect_timeout=3 so each attempt fails fast if iptables is still
1752+
// settling; without it libpq can block for the kernel TCP timeout
1753+
// (~30s), which exceeds the retry deadline on a single failed attempt.
1754+
let url = format!("{pg_url} connect_timeout=3");
1755+
let deadline = Instant::now() + Duration::from_secs(60);
17521756
loop {
1753-
match postgres::Client::connect(&pg_url, postgres::NoTls) {
1757+
match postgres::Client::connect(&url, postgres::NoTls) {
17541758
Ok(c) => break c,
17551759
Err(_) if Instant::now() < deadline => {
17561760
std::thread::sleep(Duration::from_millis(100));
@@ -2454,9 +2458,13 @@ fn sink_crash_mid_write() {
24542458
assert!(out.status.success());
24552459

24562460
let mut primary_client = {
2457-
let deadline = Instant::now() + Duration::from_secs(10);
2461+
// connect_timeout=3 so each attempt fails fast if iptables is still
2462+
// settling; without it libpq can block for the kernel TCP timeout
2463+
// (~30s), which exceeds the retry deadline on a single failed attempt.
2464+
let url = format!("{pg_url} connect_timeout=3");
2465+
let deadline = Instant::now() + Duration::from_secs(60);
24582466
loop {
2459-
match postgres::Client::connect(&pg_url, postgres::NoTls) {
2467+
match postgres::Client::connect(&url, postgres::NoTls) {
24602468
Ok(c) => break c,
24612469
Err(_) if Instant::now() < deadline => {
24622470
std::thread::sleep(Duration::from_millis(100));
@@ -2890,9 +2898,13 @@ fn wal_gap_stalls_replica() {
28902898
assert!(out.status.success());
28912899

28922900
let mut primary_client = {
2893-
let deadline = Instant::now() + Duration::from_secs(10);
2901+
// connect_timeout=3 so each attempt fails fast if iptables is still
2902+
// settling; without it libpq can block for the kernel TCP timeout
2903+
// (~30s), which exceeds the retry deadline on a single failed attempt.
2904+
let url = format!("{pg_url} connect_timeout=3");
2905+
let deadline = Instant::now() + Duration::from_secs(60);
28942906
loop {
2895-
match postgres::Client::connect(&pg_url, postgres::NoTls) {
2907+
match postgres::Client::connect(&url, postgres::NoTls) {
28962908
Ok(c) => break c,
28972909
Err(_) if Instant::now() < deadline => {
28982910
std::thread::sleep(Duration::from_millis(100));
@@ -3353,9 +3365,10 @@ fn timeline_boundary_survives_failover() {
33533365
assert!(out.status.success());
33543366

33553367
let mut t1_client = {
3356-
let deadline = Instant::now() + Duration::from_secs(10);
3368+
let url = format!("{pg_url} connect_timeout=3");
3369+
let deadline = Instant::now() + Duration::from_secs(60);
33573370
loop {
3358-
match postgres::Client::connect(&pg_url, postgres::NoTls) {
3371+
match postgres::Client::connect(&url, postgres::NoTls) {
33593372
Ok(c) => break c,
33603373
Err(_) if Instant::now() < deadline => {
33613374
std::thread::sleep(Duration::from_millis(100));

0 commit comments

Comments
 (0)