22
33// bad_host_repro_test.go — empirically validates which fibre provider
44// host formats survive end-to-end through the chain + fibre client +
5- // gRPC dialer. Three formats are exercised:
5+ // gRPC dialer against a docker stack built from celestia-app
6+ // `feat/fibre-payments` (i.e. a chain with the strict host:port
7+ // validation in x/valaddr).
68//
7- // - `http://host:port` : reproduces production error
8- // "too many colons in address"
9- // - `host:port` : reproduces production error
10- // "first path segment in URL cannot
11- // contain colon"
12- // - `dns:///host:port` : the only working form today
9+ // Three formats are exercised:
1310//
14- // Root cause: x/valaddr `MsgSetFibreProviderInfo.ValidateBasic` only
15- // checks that the host is non-empty and ≤100 chars, so any of the
16- // above is accepted on chain. At read time the fibre client's
17- // `HostRegistry.GetHost` runs `url.Parse(host)`; bare host:port fails
18- // that, while `http://...` passes and then breaks downstream because
19- // `grpc.NewClient` doesn't recognise `http` as a resolver scheme and
20- // appends a default `:443`, yielding `http://host:port:443` ("too
21- // many colons"). Only `dns:///host:port` parses as a URL AND is a
22- // gRPC-known resolver scheme, so it works end-to-end.
11+ // - `host:port` : the canonical accepted form. Upload
12+ // succeeds end-to-end.
13+ // - `http://host:port` : rejected by `MsgSetFibreProviderInfo`
14+ // `ValidateBasic` — set-host tx fails.
15+ // - `dns:///host:port` : also rejected by `ValidateBasic` for
16+ // the same reason. Used to be the only
17+ // working form pre-fix, see
18+ // celestia-app PR #7183.
2319//
24- // The expected fix is to require a strict `host:port` form in
25- // `ValidateBasic` (no scheme, no path, no userinfo). After that lands
26- // the chain rejects the registration tx for both `http://...` and
27- // `dns:///...` and only `host:port` succeeds — assertions in this
28- // test will need to flip.
20+ // Pre-fix (celestia-app on `main` without #7183): the chain accepted
21+ // every string, the failures surfaced at upload time as either
22+ // "too many colons in address" (http:// case) or
23+ // "first path segment in URL cannot contain colon" (host:port case).
24+ // Those production-symptom assertions live in this test's git history
25+ // before the fix landed; once the chain enforces format, the failure
26+ // surfaces earlier (set-host tx rejection) which is what we assert
27+ // here.
2928//
3029// Run with:
3130//
@@ -52,31 +51,22 @@ import (
5251
5352// canonicalHosts records the per-validator hosts that register-fsps.sh
5453// installs at boot. The repro test re-registers each validator with a
55- // broken host , asserts Upload fails , then restores these canonical
56- // values so other tests remain runnable.
54+ // chosen format , asserts the resulting Upload behaviour , then restores
55+ // these canonical values so other tests remain runnable.
5756var canonicalHosts = map [int ]string {
58- 0 : "dns:/// 127.0.0.1:7980" ,
59- 1 : "dns:/// 127.0.0.1:7981" ,
60- 2 : "dns:/// 127.0.0.1:7982" ,
61- 3 : "dns:/// 127.0.0.1:7983" ,
57+ 0 : "127.0.0.1:7980" ,
58+ 1 : "127.0.0.1:7981" ,
59+ 2 : "127.0.0.1:7982" ,
60+ 3 : "127.0.0.1:7983" ,
6261}
6362
64- // TestFibreClient_HostRegistrationFormats re-registers every validator
65- // with a particular host-string format, then attempts an Upload through
66- // a fresh adapter and asserts whether the upload succeeds or fails.
63+ // TestFibreClient_HostRegistrationFormats exercises three host formats
64+ // against the chain's MsgSetFibreProviderInfo + the fibre client's
65+ // HostRegistry + gRPC dialer:
6766//
68- // The matrix establishes empirically which formats the chain + fibre
69- // client accept end-to-end:
70- //
71- // - http_scheme_prefix → fails with "too many colons in address"
72- // - bare_host_port → fails with "first path segment in URL ..."
73- // - dns_prefix → succeeds (this is the only working form)
74- //
75- // The two failing cases exactly reproduce the production warnings the
76- // operator saw. The succeeding case is the positive control showing
77- // `dns:///host:port` is the working format today, which is what the
78- // proposed valaddr fix changes (it would make `host:port` succeed and
79- // `dns:///` fail).
67+ // - host_port → set-host succeeds, Upload succeeds (positive)
68+ // - http_prefix → set-host fails at chain ValidateBasic (negative)
69+ // - dns_prefix → set-host fails at chain ValidateBasic (negative)
8070//
8171// After each subtest the canonical registrations are restored so
8272// sibling tests on the shared docker stack continue to pass.
@@ -86,36 +76,34 @@ func TestFibreClient_HostRegistrationFormats(t *testing.T) {
8676 // hostFor returns the host string to register for the given
8777 // validator index (0..3).
8878 hostFor func (i int ) string
89- // wantUploadErr, when non-empty, marks this case as expected to
90- // fail Upload; the substring must appear in the resulting error
91- // chain (we look at the per-validator warning; the outer error
92- // is "not enough voting power" once enough fail).
93- wantUploadErr string
79+ // wantSetHostErr, when non-empty, marks this case as expected
80+ // to fail at `tx valaddr set-host` time. The substring must
81+ // appear in the CLI's stderr/stdout output (the chain's
82+ // ValidateBasic / MsgSetFibreProviderInfo response includes
83+ // "host must be in host:port form" or similar).
84+ wantSetHostErr string
9485 }{
9586 {
96- name : "http_scheme_prefix " ,
87+ name : "host_port " ,
9788 hostFor : func (i int ) string {
98- return fmt .Sprintf ("http:// 127.0.0.1:%d" , 7980 + i )
89+ return fmt .Sprintf ("127.0.0.1:%d" , 7980 + i )
9990 },
100- // Adapter uploads return the aggregate error; the
101- // per-validator dial error is logged, not bubbled. We
102- // assert the aggregate ("not enough voting power") here
103- // and rely on log capture below for the specific message.
104- wantUploadErr : "not enough voting power" ,
10591 },
10692 {
107- name : "bare_host_port " ,
93+ name : "http_prefix " ,
10894 hostFor : func (i int ) string {
109- return fmt .Sprintf ("127.0.0.1:%d" , 7980 + i )
95+ return fmt .Sprintf ("http:// 127.0.0.1:%d" , 7980 + i )
11096 },
111- wantUploadErr : "not enough voting power" ,
97+ // celestia-app's x/valaddr ValidateBasic returns this
98+ // error chain via the SDK CLI broadcast path.
99+ wantSetHostErr : "host must be in host:port form" ,
112100 },
113101 {
114102 name : "dns_prefix" ,
115103 hostFor : func (i int ) string {
116104 return fmt .Sprintf ("dns:///127.0.0.1:%d" , 7980 + i )
117105 },
118- // No wantUploadErr — Upload should succeed.
106+ wantSetHostErr : "host must be in host:port form" ,
119107 },
120108 }
121109
@@ -124,9 +112,6 @@ func TestFibreClient_HostRegistrationFormats(t *testing.T) {
124112 ctx , cancel := context .WithTimeout (context .Background (), 3 * time .Minute )
125113 t .Cleanup (cancel )
126114
127- jwt := readBridgeJWT (t )
128- kr := readClientKeyring (t )
129-
130115 t .Cleanup (func () {
131116 restoreCtx , restoreCancel := context .WithTimeout (context .Background (), 90 * time .Second )
132117 defer restoreCancel ()
@@ -135,30 +120,39 @@ func TestFibreClient_HostRegistrationFormats(t *testing.T) {
135120 t .Logf ("WARNING: failed to restore val%d host: %v" , i , err )
136121 }
137122 }
138- // One waitFor is enough — they're applied serially
139- // against val0's RPC and tx ordering guarantees the
140- // later ones land after the earlier one's check.
141123 if err := waitForHost (restoreCtx , t , canonicalHosts [3 ]); err != nil {
142124 t .Logf ("WARNING: canonical hosts did not propagate within timeout: %v" , err )
143125 }
144126 })
145127
146- // Register every validator with the chosen host format.
147- // The chain accepts all of these today — even the broken
148- // ones — because ValidateBasic only checks length.
128+ // For the negative cases we register only val0 — that's
129+ // enough to demonstrate the chain rejects the format. We
130+ // don't bother trying all four since the rejection comes
131+ // from ValidateBasic, which doesn't depend on which
132+ // validator submits the tx.
133+ if tc .wantSetHostErr != "" {
134+ err := setValHost (ctx , t , 0 , tc .hostFor (0 ))
135+ require .Error (t , err , "chain must reject %q at set-host tx" , tc .hostFor (0 ))
136+ require .Contains (t , err .Error (), tc .wantSetHostErr ,
137+ "set-host error should match expected ValidateBasic message" )
138+ t .Logf ("set-host rejected as expected (%s): %v" , tc .name , err )
139+ return
140+ }
141+
142+ // Positive case: register all four validators with the
143+ // canonical format, then run a real Upload to confirm the
144+ // gRPC dial path works end-to-end.
149145 for i := 0 ; i < 4 ; i ++ {
150146 h := tc .hostFor (i )
151147 require .NoError (t , setValHost (ctx , t , i , h ),
152- "chain should accept set-host for val%d host=%q on the current code " , i , h )
148+ "chain should accept set-host for val%d host=%q" , i , h )
153149 }
154- // Wait until val3's host is observable; this is the last
155- // one we wrote, so its presence implies the others also
156- // propagated.
157150 require .NoError (t , waitForHost (ctx , t , tc .hostFor (3 )),
158151 "%s registrations should be visible on chain" , tc .name )
159152
160- // Construct a FRESH adapter so PullAll picks up the just-
161- // updated registry rather than a cached canonical entry.
153+ jwt := readBridgeJWT (t )
154+ kr := readClientKeyring (t )
155+
162156 adapter , err := cnfiber .New (ctx , cnfiber.Config {
163157 Client : client.Config {
164158 ReadConfig : client.ReadConfig {
@@ -185,16 +179,9 @@ func TestFibreClient_HostRegistrationFormats(t *testing.T) {
185179 defer uploadCancel ()
186180
187181 res , uploadErr := adapter .Upload (uploadCtx , namespace , payload )
188- if tc .wantUploadErr != "" {
189- require .Error (t , uploadErr , "Upload must fail when no validator host can be dialed" )
190- require .Contains (t , uploadErr .Error (), tc .wantUploadErr ,
191- "upload error should match expected aggregate failure" )
192- t .Logf ("upload failed as expected (%s): %v" , tc .name , uploadErr )
193- } else {
194- require .NoError (t , uploadErr , "Upload should succeed for %s host format" , tc .name )
195- require .NotEmpty (t , res .BlobID )
196- t .Logf ("upload ok (%s): blob_id=%x" , tc .name , res .BlobID )
197- }
182+ require .NoError (t , uploadErr , "Upload should succeed for %s host format" , tc .name )
183+ require .NotEmpty (t , res .BlobID )
184+ t .Logf ("upload ok (%s): blob_id=%x" , tc .name , res .BlobID )
198185 })
199186 }
200187}
@@ -211,17 +198,26 @@ func setValHost(ctx context.Context, t *testing.T, valIdx int, host string) erro
211198 "--chain-id" , chainID ,
212199 "--node" , fmt .Sprintf ("tcp://%s:26657" , valName ),
213200 "--fees" , "5000utia" ,
201+ "--output" , "json" ,
214202 "--yes" ,
215203 )
216204 cmd .Dir = mustDockerDir (t )
217205 out , err := cmd .CombinedOutput ()
206+ // Wider sleep so the next set-host on the same validator account
207+ // doesn't race the mempool's nonce check ("tx already exists").
208+ defer time .Sleep (4 * time .Second )
218209 if err != nil {
210+ // Two flavours: (a) pre-broadcast ValidateBasic rejection — CLI
211+ // exits non-zero with the validation error in stderr, no JSON
212+ // payload; (b) broadcast accepted but the chain returned a
213+ // non-zero code in the JSON ack. Surface either to the caller.
219214 return fmt .Errorf ("set-host %q on %s: %w: %s" , host , valName , err , string (out ))
220215 }
221- // Brief pause so the next set-host targets a tx with an incremented
222- // account sequence (sequential txs from the same validator account
223- // can race the mempool's nonce check).
224- time .Sleep (2 * time .Second )
216+ // Successful broadcast: parse the JSON to confirm the chain code is 0.
217+ if ! strings .Contains (string (out ), `"code":0` ) {
218+ return fmt .Errorf ("chain rejected set-host %q on %s (non-zero code): %s" ,
219+ host , valName , string (out ))
220+ }
225221 return nil
226222}
227223
0 commit comments