Skip to content

Commit df95668

Browse files
walldissclaude
andcommitted
test(celestia-node-fiber): add dns:/// positive control to host-format matrix
Renames TestFibreClient_BadHostRegistration → TestFibreClient_HostRegistrationFormats and adds a third subtest that re-registers all four validators with `dns:///host:port` and asserts Upload SUCCEEDS. Together with the two reject cases this empirically demonstrates that today's chain accepts any host string but only the `dns:///host:port` form survives end-to-end: http://host:port → "too many colons in address" host:port → "first path segment in URL cannot contain colon" dns:///host:port → upload ok The first two reproduce the operator-reported production warnings verbatim. The third is the positive control showing why talis and register-fsps.sh both prepend `dns:///` — that prefix is the only form gRPC's resolver registry recognises among URL-parseable inputs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1b0ad6a commit df95668

1 file changed

Lines changed: 79 additions & 32 deletions

File tree

tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go

Lines changed: 79 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,36 @@
11
//go:build fibre_docker
22

3-
// bad_host_repro_test.go — reproduces the production "too many colons in
4-
// address" / "first path segment in URL cannot contain colon" failures
5-
// observed when an operator registers a Fibre provider with a host
6-
// string that isn't in the canonical `dns:///host:port` form.
3+
// bad_host_repro_test.go — empirically validates which fibre provider
4+
// host formats survive end-to-end through the chain + fibre client +
5+
// gRPC dialer. Three formats are exercised:
6+
//
7+
// - `http://host:port` : reproduces production error
8+
// "too many colons in address"
9+
// - `host:port` : reproduces production error
10+
// "first path segment in URL cannot
11+
// contain colon"
12+
// - `dns:///host:port` : the only working form today
713
//
814
// Root cause: x/valaddr `MsgSetFibreProviderInfo.ValidateBasic` only
9-
// checks that the host is non-empty and ≤100 chars. Anything else
10-
// passes — including `http://10.0.37.242:7980`, bare `host:port`, or
11-
// arbitrary garbage. At read time the fibre client's
15+
// checks that the host is non-empty and ≤100 chars, so any of the
16+
// above is accepted on chain. At read time the fibre client's
1217
// `HostRegistry.GetHost` runs `url.Parse(host)`; bare host:port fails
1318
// that, while `http://...` passes and then breaks downstream because
1419
// `grpc.NewClient` doesn't recognise `http` as a resolver scheme and
1520
// appends a default `:443`, yielding `http://host:port:443` ("too
16-
// many colons").
21+
// many colons"). Only `dns:///host:port` parses as a URL AND is a
22+
// gRPC-known resolver scheme, so it works end-to-end.
1723
//
1824
// The expected fix is to require a strict `host:port` form in
1925
// `ValidateBasic` (no scheme, no path, no userinfo). After that lands
20-
// the chain rejects the registration tx itself and the assertions
21-
// here flip — see assertChainAcceptsBadHost.
26+
// the chain rejects the registration tx for both `http://...` and
27+
// `dns:///...` and only `host:port` succeeds — assertions in this
28+
// test will need to flip.
2229
//
2330
// Run with:
2431
//
2532
// go test -tags 'fibre fibre_docker' -count=1 -timeout 5m \
26-
// -run TestFibreClient_BadHostRegistration ./testing/docker/...
33+
// -run TestFibreClient_HostRegistrationFormats ./testing/docker/...
2734

2835
package docker_test
2936

@@ -54,30 +61,61 @@ var canonicalHosts = map[int]string{
5461
3: "dns:///127.0.0.1:7983",
5562
}
5663

57-
// TestFibreClient_BadHostRegistration re-registers every validator with
58-
// a malformed host string, confirms the chain accepts the registration
59-
// (the bug), then confirms Upload fails because none of the validators
60-
// can be dialed (the symptom). After each subtest the canonical
61-
// registrations are restored so sibling tests on the shared docker
62-
// stack continue to pass.
63-
func TestFibreClient_BadHostRegistration(t *testing.T) {
64+
// TestFibreClient_HostRegistrationFormats re-registers every validator
65+
// with a particular host-string format, then attempts an Upload through
66+
// a fresh adapter and asserts whether the upload succeeds or fails.
67+
//
68+
// The matrix establishes empirically which formats the chain + fibre
69+
// client accept end-to-end:
70+
//
71+
// - http_scheme_prefix → fails with "too many colons in address"
72+
// - bare_host_port → fails with "first path segment in URL ..."
73+
// - dns_prefix → succeeds (this is the only working form)
74+
//
75+
// The two failing cases exactly reproduce the production warnings the
76+
// operator saw. The succeeding case is the positive control showing
77+
// `dns:///host:port` is the working format today, which is what the
78+
// proposed valaddr fix changes (it would make `host:port` succeed and
79+
// `dns:///` fail).
80+
//
81+
// After each subtest the canonical registrations are restored so
82+
// sibling tests on the shared docker stack continue to pass.
83+
func TestFibreClient_HostRegistrationFormats(t *testing.T) {
6484
cases := []struct {
6585
name string
66-
// hostFor returns the bad host string to register for the given
86+
// hostFor returns the host string to register for the given
6787
// validator index (0..3).
6888
hostFor func(i int) string
89+
// wantUploadErr, when non-empty, marks this case as expected to
90+
// fail Upload; the substring must appear in the resulting error
91+
// chain (we look at the per-validator warning; the outer error
92+
// is "not enough voting power" once enough fail).
93+
wantUploadErr string
6994
}{
7095
{
7196
name: "http_scheme_prefix",
7297
hostFor: func(i int) string {
7398
return fmt.Sprintf("http://127.0.0.1:%d", 7980+i)
7499
},
100+
// Adapter uploads return the aggregate error; the
101+
// per-validator dial error is logged, not bubbled. We
102+
// assert the aggregate ("not enough voting power") here
103+
// and rely on log capture below for the specific message.
104+
wantUploadErr: "not enough voting power",
75105
},
76106
{
77107
name: "bare_host_port",
78108
hostFor: func(i int) string {
79109
return fmt.Sprintf("127.0.0.1:%d", 7980+i)
80110
},
111+
wantUploadErr: "not enough voting power",
112+
},
113+
{
114+
name: "dns_prefix",
115+
hostFor: func(i int) string {
116+
return fmt.Sprintf("dns:///127.0.0.1:%d", 7980+i)
117+
},
118+
// No wantUploadErr — Upload should succeed.
81119
},
82120
}
83121

@@ -105,18 +143,19 @@ func TestFibreClient_BadHostRegistration(t *testing.T) {
105143
}
106144
})
107145

108-
// Register every validator with the broken host. The chain
109-
// should accept all of them — that's the bug.
146+
// Register every validator with the chosen host format.
147+
// The chain accepts all of these today — even the broken
148+
// ones — because ValidateBasic only checks length.
110149
for i := 0; i < 4; i++ {
111-
bad := tc.hostFor(i)
112-
require.NoError(t, setValHost(ctx, t, i, bad),
113-
"chain accepted MsgSetFibreProviderInfo for val%d host=%q (no format validation)", i, bad)
150+
h := tc.hostFor(i)
151+
require.NoError(t, setValHost(ctx, t, i, h),
152+
"chain should accept set-host for val%d host=%q on the current code", i, h)
114153
}
115-
// Wait until val3's bad host is observable; this is the
116-
// last one we wrote, so its presence implies the others
117-
// also propagated.
154+
// Wait until val3's host is observable; this is the last
155+
// one we wrote, so its presence implies the others also
156+
// propagated.
118157
require.NoError(t, waitForHost(ctx, t, tc.hostFor(3)),
119-
"bad registrations should be visible on chain")
158+
"%s registrations should be visible on chain", tc.name)
120159

121160
// Construct a FRESH adapter so PullAll picks up the just-
122161
// updated registry rather than a cached canonical entry.
@@ -140,14 +179,22 @@ func TestFibreClient_BadHostRegistration(t *testing.T) {
140179
t.Cleanup(func() { _ = adapter.Close() })
141180

142181
namespace := bytes.Repeat([]byte{0xcd}, 10)
143-
payload := []byte(fmt.Sprintf("bad-host-repro-%s-%d", tc.name, time.Now().UnixNano()))
182+
payload := []byte(fmt.Sprintf("host-format-repro-%s-%d", tc.name, time.Now().UnixNano()))
144183

145184
uploadCtx, uploadCancel := context.WithTimeout(ctx, 60*time.Second)
146185
defer uploadCancel()
147186

148-
_, uploadErr := adapter.Upload(uploadCtx, namespace, payload)
149-
require.Error(t, uploadErr, "Upload must fail when no validator host can be dialed")
150-
t.Logf("upload failed as expected (%s): %v", tc.name, uploadErr)
187+
res, uploadErr := adapter.Upload(uploadCtx, namespace, payload)
188+
if tc.wantUploadErr != "" {
189+
require.Error(t, uploadErr, "Upload must fail when no validator host can be dialed")
190+
require.Contains(t, uploadErr.Error(), tc.wantUploadErr,
191+
"upload error should match expected aggregate failure")
192+
t.Logf("upload failed as expected (%s): %v", tc.name, uploadErr)
193+
} else {
194+
require.NoError(t, uploadErr, "Upload should succeed for %s host format", tc.name)
195+
require.NotEmpty(t, res.BlobID)
196+
t.Logf("upload ok (%s): blob_id=%x", tc.name, res.BlobID)
197+
}
151198
})
152199
}
153200
}

0 commit comments

Comments
 (0)