11//go:build fibre_docker
22
3- // bad_host_repro_test.go — reproduces the production "too many colons in
4- // address" / "first path segment in URL cannot contain colon" failures
5- // observed when an operator registers a Fibre provider with a host
6- // string that isn't in the canonical `dns:///host:port` form.
3+ // bad_host_repro_test.go — empirically validates which fibre provider
4+ // host formats survive end-to-end through the chain + fibre client +
5+ // gRPC dialer. Three formats are exercised:
6+ //
7+ // - `http://host:port` : reproduces production error
8+ // "too many colons in address"
9+ // - `host:port` : reproduces production error
10+ // "first path segment in URL cannot
11+ // contain colon"
12+ // - `dns:///host:port` : the only working form today
713//
814// Root cause: x/valaddr `MsgSetFibreProviderInfo.ValidateBasic` only
9- // checks that the host is non-empty and ≤100 chars. Anything else
10- // passes — including `http://10.0.37.242:7980`, bare `host:port`, or
11- // arbitrary garbage. At read time the fibre client's
15+ // checks that the host is non-empty and ≤100 chars, so any of the
16+ // above is accepted on chain. At read time the fibre client's
1217// `HostRegistry.GetHost` runs `url.Parse(host)`; bare host:port fails
1318// that, while `http://...` passes and then breaks downstream because
1419// `grpc.NewClient` doesn't recognise `http` as a resolver scheme and
1520// appends a default `:443`, yielding `http://host:port:443` ("too
16- // many colons").
21+ // many colons"). Only `dns:///host:port` parses as a URL AND is a
22+ // gRPC-known resolver scheme, so it works end-to-end.
1723//
1824// The expected fix is to require a strict `host:port` form in
1925// `ValidateBasic` (no scheme, no path, no userinfo). After that lands
20- // the chain rejects the registration tx itself and the assertions
21- // here flip — see assertChainAcceptsBadHost.
26+ // the chain rejects the registration tx for both `http://...` and
27+ // `dns:///...` and only `host:port` succeeds — assertions in this
28+ // test will need to flip.
2229//
2330// Run with:
2431//
2532// go test -tags 'fibre fibre_docker' -count=1 -timeout 5m \
26- // -run TestFibreClient_BadHostRegistration ./testing/docker/...
33+ // -run TestFibreClient_HostRegistrationFormats ./testing/docker/...
2734
2835package docker_test
2936
@@ -54,30 +61,61 @@ var canonicalHosts = map[int]string{
5461 3 : "dns:///127.0.0.1:7983" ,
5562}
5663
57- // TestFibreClient_BadHostRegistration re-registers every validator with
58- // a malformed host string, confirms the chain accepts the registration
59- // (the bug), then confirms Upload fails because none of the validators
60- // can be dialed (the symptom). After each subtest the canonical
61- // registrations are restored so sibling tests on the shared docker
62- // stack continue to pass.
63- func TestFibreClient_BadHostRegistration (t * testing.T ) {
64+ // TestFibreClient_HostRegistrationFormats re-registers every validator
65+ // with a particular host-string format, then attempts an Upload through
66+ // a fresh adapter and asserts whether the upload succeeds or fails.
67+ //
68+ // The matrix establishes empirically which formats the chain + fibre
69+ // client accept end-to-end:
70+ //
71+ // - http_scheme_prefix → fails with "too many colons in address"
72+ // - bare_host_port → fails with "first path segment in URL ..."
73+ // - dns_prefix → succeeds (this is the only working form)
74+ //
75+ // The two failing cases exactly reproduce the production warnings the
76+ // operator saw. The succeeding case is the positive control showing
77+ // `dns:///host:port` is the working format today, which is what the
78+ // proposed valaddr fix changes (it would make `host:port` succeed and
79+ // `dns:///` fail).
80+ //
81+ // After each subtest the canonical registrations are restored so
82+ // sibling tests on the shared docker stack continue to pass.
83+ func TestFibreClient_HostRegistrationFormats (t * testing.T ) {
6484 cases := []struct {
6585 name string
66- // hostFor returns the bad host string to register for the given
86+ // hostFor returns the host string to register for the given
6787 // validator index (0..3).
6888 hostFor func (i int ) string
89+ // wantUploadErr, when non-empty, marks this case as expected to
90+ // fail Upload; the substring must appear in the resulting error
91+ // chain (we look at the per-validator warning; the outer error
92+ // is "not enough voting power" once enough fail).
93+ wantUploadErr string
6994 }{
7095 {
7196 name : "http_scheme_prefix" ,
7297 hostFor : func (i int ) string {
7398 return fmt .Sprintf ("http://127.0.0.1:%d" , 7980 + i )
7499 },
100+ // Adapter uploads return the aggregate error; the
101+ // per-validator dial error is logged, not bubbled. We
102+ // assert the aggregate ("not enough voting power") here
103+ // and rely on log capture below for the specific message.
104+ wantUploadErr : "not enough voting power" ,
75105 },
76106 {
77107 name : "bare_host_port" ,
78108 hostFor : func (i int ) string {
79109 return fmt .Sprintf ("127.0.0.1:%d" , 7980 + i )
80110 },
111+ wantUploadErr : "not enough voting power" ,
112+ },
113+ {
114+ name : "dns_prefix" ,
115+ hostFor : func (i int ) string {
116+ return fmt .Sprintf ("dns:///127.0.0.1:%d" , 7980 + i )
117+ },
118+ // No wantUploadErr — Upload should succeed.
81119 },
82120 }
83121
@@ -105,18 +143,19 @@ func TestFibreClient_BadHostRegistration(t *testing.T) {
105143 }
106144 })
107145
108- // Register every validator with the broken host. The chain
109- // should accept all of them — that's the bug.
146+ // Register every validator with the chosen host format.
147+ // The chain accepts all of these today — even the broken
148+ // ones — because ValidateBasic only checks length.
110149 for i := 0 ; i < 4 ; i ++ {
111- bad := tc .hostFor (i )
112- require .NoError (t , setValHost (ctx , t , i , bad ),
113- "chain accepted MsgSetFibreProviderInfo for val%d host=%q (no format validation) " , i , bad )
150+ h := tc .hostFor (i )
151+ require .NoError (t , setValHost (ctx , t , i , h ),
152+ "chain should accept set-host for val%d host=%q on the current code " , i , h )
114153 }
115- // Wait until val3's bad host is observable; this is the
116- // last one we wrote, so its presence implies the others
117- // also propagated.
154+ // Wait until val3's host is observable; this is the last
155+ // one we wrote, so its presence implies the others also
156+ // propagated.
118157 require .NoError (t , waitForHost (ctx , t , tc .hostFor (3 )),
119- "bad registrations should be visible on chain" )
158+ "%s registrations should be visible on chain" , tc . name )
120159
121160 // Construct a FRESH adapter so PullAll picks up the just-
122161 // updated registry rather than a cached canonical entry.
@@ -140,14 +179,22 @@ func TestFibreClient_BadHostRegistration(t *testing.T) {
140179 t .Cleanup (func () { _ = adapter .Close () })
141180
142181 namespace := bytes .Repeat ([]byte {0xcd }, 10 )
143- payload := []byte (fmt .Sprintf ("bad- host-repro-%s-%d" , tc .name , time .Now ().UnixNano ()))
182+ payload := []byte (fmt .Sprintf ("host-format -repro-%s-%d" , tc .name , time .Now ().UnixNano ()))
144183
145184 uploadCtx , uploadCancel := context .WithTimeout (ctx , 60 * time .Second )
146185 defer uploadCancel ()
147186
148- _ , uploadErr := adapter .Upload (uploadCtx , namespace , payload )
149- require .Error (t , uploadErr , "Upload must fail when no validator host can be dialed" )
150- t .Logf ("upload failed as expected (%s): %v" , tc .name , uploadErr )
187+ res , uploadErr := adapter .Upload (uploadCtx , namespace , payload )
188+ if tc .wantUploadErr != "" {
189+ require .Error (t , uploadErr , "Upload must fail when no validator host can be dialed" )
190+ require .Contains (t , uploadErr .Error (), tc .wantUploadErr ,
191+ "upload error should match expected aggregate failure" )
192+ t .Logf ("upload failed as expected (%s): %v" , tc .name , uploadErr )
193+ } else {
194+ require .NoError (t , uploadErr , "Upload should succeed for %s host format" , tc .name )
195+ require .NotEmpty (t , res .BlobID )
196+ t .Logf ("upload ok (%s): blob_id=%x" , tc .name , res .BlobID )
197+ }
151198 })
152199 }
153200}
0 commit comments