diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..c91c3f3 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[net] +git-fetch-with-cli = true diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..f1ba8e7 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,31 @@ +{ + "permissions": { + "allow": [ + "Bash(cargo check *)", + "Bash(cargo test *)", + "Bash(cargo build *)", + "Bash(cargo run *)", + "Bash(echo \"exit=$?\")", + "Read(//home/ry/src/tree-sitter-htcl/**)", + "Read(//home/ry/src/tree-sitter-htcl/bindings/**)", + "Bash(tree-sitter generate *)", + "Bash(tree-sitter parse *)", + "Bash(tree-sitter test *)", + "Read(//home/ry/src/redhawk/host/vivado/metro/ip/**)", + "Read(//home/ry/src/amd-htcl/**)", + "Bash(cargo install *)", + "Read(//home/ry/src/htcl/amd/vivado-cmd/**)", + "Read(//home/ry/src/htcl/amd/vivado-cmd/cmd/**)", + "Bash(vw ip *)", + "Read(//tmp/**)", + "Bash(vw check *)", + "Bash(grep -nA20 \"fn diagnostics\\\\|validate\\(\\\\|fn publish_diagnostics\" vw-analyzer/src/htcl_backend.rs)", + "Bash(awk '{sum += $4} END {print \"passed:\", sum}')", + "Bash(vw --help)", + "Bash(awk '{ ok+=$4; failed+=$6 } END { print ok \" passed, \" failed \" failed\" }')", + "Bash(cargo clippy *)", + "Bash(awk *)", + "Bash(/home/ry/src/vw/target/debug/vw run *)" + ] + } +} diff --git a/.github/buildomat/jobs/build-linux.sh b/.github/buildomat/jobs/build-linux.sh index 4347a75..e5490dd 100755 --- a/.github/buildomat/jobs/build-linux.sh +++ b/.github/buildomat/jobs/build-linux.sh @@ -2,11 +2,14 @@ #: #: name = "build-linux" #: variety = "basic" -#: target = "ubuntu-22.04" +#: target = "ubuntu-24.04" #: rust_toolchain = "stable" #: output_rules = [ #: "/work/release/*", #: ] +#: access_repos = [ +#: "oxidecomputer/ipe" +#: ] #: #: [[publish]] #: series = "linux" diff --git a/.github/buildomat/jobs/build.sh b/.github/buildomat/jobs/build.sh index 74e6862..f58e0d1 100755 --- a/.github/buildomat/jobs/build.sh +++ b/.github/buildomat/jobs/build.sh @@ -2,11 +2,14 @@ #: #: name = "build" #: variety = "basic" -#: target = "helios-2.0" +#: target = "helios-3.0" #: rust_toolchain = "stable" #: output_rules = [ #: "/work/release/*", #: ] +#: access_repos = [ +#: "oxidecomputer/ipe" +#: ] #: #: [[publish]] #: series = "illumos" diff --git a/.gitignore b/.gitignore index ea8c4bf..de95887 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /target +*.jou +*.log +.srcs diff --git a/CLAUDE.md b/CLAUDE.md index c0bf84b..9016f84 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -92,3 +92,24 @@ project/ ├── vw.toml └── vhdl_ls.toml ``` + +## htcl language stack + +`vw-htcl` (winnow parser, AST, analysis) is the source of truth for +htcl semantics. It feeds `vw run` (interpreter front-end driving an +EDA backend), `vw analyzer` (LSP), and eventually `vw repl`. + +There is a sibling tree-sitter grammar at +`~/src/tree-sitter-htcl` (repo: +https://github.com/oxidecomputer/tree-sitter-htcl) used by editors +for syntax highlighting. **Both must stay in sync.** When changing +the htcl grammar in `vw-htcl/src/parser.rs` or `ast.rs`: + +- Update `~/src/tree-sitter-htcl/grammar.js` to match. +- Update or add corpus tests in `~/src/tree-sitter-htcl/test/corpus/`. +- Update `~/src/tree-sitter-htcl/queries/highlights.scm` if new node + types deserve distinct highlighting. +- Run `tree-sitter generate && tree-sitter test` in that repo. + +The sync contract (vw-htcl leads, divergences are documented) is +written up in `~/src/tree-sitter-htcl/README.md`. diff --git a/Cargo.lock b/Cargo.lock index 16a7be5..2264527 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -82,12 +88,60 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "arboard" +version = "3.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0348a1c054491f4bfe6ab86a7b6ab1e44e45d899005de92f58b3df180b36ddaf" +dependencies = [ + "clipboard-win", + "image 0.25.10", + "log", + "objc2", + "objc2-app-kit", + "objc2-core-foundation", + "objc2-core-graphics", + "objc2-foundation", + "parking_lot", + "percent-encoding", + "windows-sys 0.59.0", + "x11rb", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "auto_impl" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdcb70bdbc4d478427380519163274ac86e52916e10f0a8889adf0f96d3fee7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "1.3.2" @@ -118,6 +172,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "bytes" version = "1.11.1" @@ -130,6 +190,21 @@ version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48" +[[package]] +name = "cassowary" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" + +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.57" @@ -148,6 +223,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "chrono" version = "0.4.44" @@ -201,6 +282,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" +[[package]] +name = "clipboard-win" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" +dependencies = [ + "error-code", +] + [[package]] name = "color_quant" version = "1.1.0" @@ -223,6 +313,20 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "compact_str" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fd622ebbb56a5b2ccb651b32b911cdeb2a9b4b11776b2473bf26a26a286244e" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "static_assertions", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -309,6 +413,85 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags 2.11.0", + "crossterm_winapi", + "futures-core", + "mio", + "parking_lot", + "rustix 0.38.44", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "dirs" version = "5.0.1" @@ -351,6 +534,16 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "dispatch2" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" +dependencies = [ + "bitflags 2.11.0", + "objc2", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -371,6 +564,12 @@ dependencies = [ "libloading", ] +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + [[package]] name = "dunce" version = "1.0.5" @@ -431,12 +630,24 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "error-code" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" + [[package]] name = "fastrand" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "fax" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf1079563223d5d59d83c85886a56e586cfd5c1a26292e971a0fa266531ac5a" + [[package]] name = "fdeflate" version = "0.3.7" @@ -446,6 +657,17 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "filedescriptor" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e40758ed24c9b2eeb76c35fb0aebc66c626084edd827e07e1552279814c6682d" +dependencies = [ + "libc", + "thiserror 1.0.69", + "winapi", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -558,6 +780,104 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "gethostname" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bd49230192a3797a9a4d6abe9b3eed6f7fa4c8a8a4947977c6f80025f92cbd8" +dependencies = [ + "rustix 1.1.4", + "windows-link", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -625,12 +945,31 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + [[package]] name = "hashbrown" version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ + "allocator-api2", + "equivalent", "foldhash", ] @@ -646,6 +985,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + [[package]] name = "iana-time-zone" version = "0.1.65" @@ -757,6 +1102,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -789,7 +1140,21 @@ dependencies = [ "color_quant", "jpeg-decoder", "num-traits", - "png", + "png 0.17.16", +] + +[[package]] +name = "image" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104" +dependencies = [ + "bytemuck", + "byteorder-lite", + "moxcms", + "num-traits", + "png 0.18.1", + "tiff", ] [[package]] @@ -804,12 +1169,53 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + +[[package]] +name = "instability" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb2d60ef19920a3a9193c3e371f726ec1dafc045dac788d0fb3704272458971" +dependencies = [ + "darling", + "indoc", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ipxact" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/ipe?branch=ry%2Finit#0360158d06554a7019aba9d17c9d654119f48d5c" +dependencies = [ + "quick-xml", + "serde", + "thiserror 2.0.18", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -928,6 +1334,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -956,10 +1368,41 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] -name = "memchr" -version = "2.8.0" +name = "lru" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "lsp-types" +version = "0.94.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66bfd44a06ae10647fe3f8214762e9369fd4248df1350924b4ef9e770a85ea1" +dependencies = [ + "bitflags 1.3.2", + "serde", + "serde_json", + "serde_repr", + "url", +] + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "miniz_oxide" @@ -978,16 +1421,48 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", + "log", "wasi", "windows-sys 0.61.2", ] +[[package]] +name = "moxcms" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb85c154ba489f01b25c0d36ae69a87e4a1c73a72631fc6c0eb6dde34a73e44b" +dependencies = [ + "num-traits", + "pxfm", +] + [[package]] name = "netrc" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9a91b326434fca226707ed8ec1fd22d4e1c96801abdf10c412afdc7d97116e0" +[[package]] +name = "nix" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +dependencies = [ + "bitflags 2.11.0", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -997,6 +1472,79 @@ dependencies = [ "autocfg", ] +[[package]] +name = "objc2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" +dependencies = [ + "objc2-encode", +] + +[[package]] +name = "objc2-app-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d49e936b501e5c5bf01fda3a9452ff86dc3ea98ad5f283e1455153142d97518c" +dependencies = [ + "bitflags 2.11.0", + "objc2", + "objc2-core-graphics", + "objc2-foundation", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags 2.11.0", + "dispatch2", + "objc2", +] + +[[package]] +name = "objc2-core-graphics" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807" +dependencies = [ + "bitflags 2.11.0", + "dispatch2", + "objc2", + "objc2-core-foundation", + "objc2-io-surface", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "objc2-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" +dependencies = [ + "bitflags 2.11.0", + "objc2", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-io-surface" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180788110936d59bab6bd83b6060ffdfffb3b922ba1396b312ae795e1de9d81d" +dependencies = [ + "bitflags 2.11.0", + "objc2", + "objc2-core-foundation", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -1039,7 +1587,7 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2ad9b889f1b12e0b9ee24db044b5129150d5eada288edc800f789928dc8c0e3" dependencies = [ - "unicode-width", + "unicode-width 0.1.14", ] [[package]] @@ -1065,6 +1613,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "pathfinder_geometry" version = "0.5.1" @@ -1102,6 +1656,26 @@ dependencies = [ "serde", ] +[[package]] +name = "pin-project" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2466b2336ed02bcdca6b294417127b90ec92038d1d5c4fbeac971a922e0e0924" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c96395f0a926bc13b1c17622aaddda1ecb55d49c8f1bf9777e4d877800a43f8b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -1128,7 +1702,7 @@ checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "chrono", "font-kit", - "image", + "image 0.24.9", "lazy_static", "num-traits", "pathfinder_geometry", @@ -1153,7 +1727,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ce181e3f6bf82d6c1dc569103ca7b1bd964c60ba03d7e6cdfbb3e3eb7f7405" dependencies = [ "gif", - "image", + "image 0.24.9", "plotters-backend", ] @@ -1179,6 +1753,40 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "png" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61" +dependencies = [ + "bitflags 2.11.0", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + +[[package]] +name = "portable-pty" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4a596a2b3d2752d94f51fac2d4a96737b8705dddd311a32b9af47211f08671e" +dependencies = [ + "anyhow", + "bitflags 1.3.2", + "downcast-rs", + "filedescriptor", + "lazy_static", + "libc", + "log", + "nix", + "serial2", + "shared_library", + "shell-words", + "winapi", + "winreg", +] + [[package]] name = "potential_utf" version = "0.1.4" @@ -1207,6 +1815,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pxfm" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0c5ccf5294c6ccd63a74f1565028353830a9c2f5eb0c682c355c471726a6e3f" + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quote" version = "1.0.45" @@ -1228,6 +1858,27 @@ version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" +[[package]] +name = "ratatui" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b" +dependencies = [ + "bitflags 2.11.0", + "cassowary", + "compact_str", + "crossterm", + "indoc", + "instability", + "itertools 0.13.0", + "lru", + "paste", + "strum 0.26.3", + "unicode-segmentation", + "unicode-truncate", + "unicode-width 0.2.0", +] + [[package]] name = "rayon" version = "1.11.0" @@ -1317,6 +1968,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags 2.11.0", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + [[package]] name = "rustix" version = "1.1.4" @@ -1326,7 +1990,7 @@ dependencies = [ "bitflags 2.11.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.12.1", "windows-sys 0.61.2", ] @@ -1336,6 +2000,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "same-file" version = "1.0.6" @@ -1400,6 +2070,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_spanned" version = "0.6.9" @@ -1409,12 +2090,69 @@ dependencies = [ "serde", ] +[[package]] +name = "serial2" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9eb6ea5562eeaed6936b8b54e086aa0f88b9e5b1bef45beb038e2519fa1185b1" +dependencies = [ + "cfg-if", + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shared_library" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9e7e0f2bfae24d8a5b5a66c5b257a83c7412304311512a0c054cd5e619da11" +dependencies = [ + "lazy_static", + "libc", +] + +[[package]] +name = "shell-words" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77" + [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "signal-hook" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-mio" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc" +dependencies = [ + "libc", + "mio", + "signal-hook", +] + [[package]] name = "signal-hook-registry" version = "1.4.8" @@ -1431,6 +2169,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "smallvec" version = "1.15.1" @@ -1453,19 +2197,47 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros 0.26.4", +] + [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ - "strum_macros", + "strum_macros 0.27.2", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", ] [[package]] @@ -1487,7 +2259,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a9a86e5144f63c2d18334698269a8bfae6eece345c70b64821ea5b35054ec99" dependencies = [ "memchr", - "unicode-width", + "unicode-width 0.1.14", ] [[package]] @@ -1521,7 +2293,7 @@ dependencies = [ "fastrand", "getrandom 0.4.2", "once_cell", - "rustix", + "rustix 1.1.4", "windows-sys 0.61.2", ] @@ -1565,6 +2337,29 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tiff" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63feaf3343d35b6ca4d50483f94843803b0f51634937cc2ec519fc32232bc52" +dependencies = [ + "fax", + "flate2", + "half", + "quick-error", + "weezl", + "zune-jpeg", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -1603,6 +2398,19 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.8.23" @@ -1635,7 +2443,7 @@ dependencies = [ "serde_spanned", "toml_datetime", "toml_write", - "winnow", + "winnow 0.7.15", ] [[package]] @@ -1644,24 +2452,179 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-lsp" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ba052b54a6627628d9b3c34c176e7eda8359b7da9acd497b9f20998d118508" +dependencies = [ + "async-trait", + "auto_impl", + "bytes", + "dashmap", + "futures", + "httparse", + "lsp-types", + "memchr", + "serde", + "serde_json", + "tokio", + "tokio-util", + "tower", + "tower-lsp-macros", + "tracing", +] + +[[package]] +name = "tower-lsp-macros" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84fd902d4e0b9a4b27f2f440108dc034e1758628a9b702f8ec61ad66355422fa" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + [[package]] name = "ttf-parser" version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17f77d76d837a7830fe1d4f12b7b4ba4192c1888001c7164257e4bc6d21d96b4" +[[package]] +name = "tui-textarea" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a5318dd619ed73c52a9417ad19046724effc1287fb75cdcc4eca1d6ac1acbae" +dependencies = [ + "crossterm", + "ratatui", + "unicode-width 0.2.0", +] + [[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-segmentation" +version = "1.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8" + +[[package]] +name = "unicode-truncate" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3644627a5af5fa321c95b9b235a72fd24cd29c648c2c379431e6628655627bf" +dependencies = [ + "itertools 0.13.0", + "unicode-segmentation", + "unicode-width 0.1.14", +] + [[package]] name = "unicode-width" version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -1678,6 +2641,7 @@ dependencies = [ "idna", "percent-encoding", "serde", + "serde_derive", ] [[package]] @@ -1692,6 +2656,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" @@ -1710,12 +2680,12 @@ dependencies = [ "enum-map", "fnv", "glob", - "itertools", + "itertools 0.14.0", "pad", "parking_lot", "pinned_vec", "rayon", - "strum", + "strum 0.27.2", "subst", "toml", "vhdl_lang_macros", @@ -1739,7 +2709,80 @@ dependencies = [ "clap", "colored", "tokio", + "tracing-subscriber", + "vw-analyzer", + "vw-eda", + "vw-htcl", + "vw-htcl-cmd", + "vw-ip", "vw-lib", + "vw-repl", + "vw-vivado", +] + +[[package]] +name = "vw-analyzer" +version = "0.1.0" +dependencies = [ + "async-trait", + "camino", + "serde", + "serde_json", + "tempfile", + "tokio", + "tower-lsp", + "tracing", + "tracing-subscriber", + "vw-htcl", + "vw-lib", +] + +[[package]] +name = "vw-eda" +version = "0.1.0" +dependencies = [ + "async-trait", + "serde", + "serde_json", + "thiserror 1.0.69", + "tokio", +] + +[[package]] +name = "vw-htcl" +version = "0.1.0" +dependencies = [ + "camino", + "serde", + "tempfile", + "thiserror 1.0.69", + "vw-quote", + "winnow 0.6.26", +] + +[[package]] +name = "vw-htcl-cmd" +version = "0.1.0" +dependencies = [ + "serde", + "tempfile", + "thiserror 1.0.69", + "toml", + "vw-htcl", + "winnow 0.6.26", +] + +[[package]] +name = "vw-ip" +version = "0.1.0" +dependencies = [ + "ipxact", + "quick-xml", + "serde", + "tempfile", + "thiserror 1.0.69", + "vw-htcl", + "vw-quote", ] [[package]] @@ -1768,6 +2811,54 @@ dependencies = [ "vhdl_lang", ] +[[package]] +name = "vw-quote" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "vw-htcl", +] + +[[package]] +name = "vw-repl" +version = "0.1.0" +dependencies = [ + "arboard", + "base64", + "camino", + "crossterm", + "dirs 5.0.1", + "futures", + "ratatui", + "tempfile", + "thiserror 1.0.69", + "tokio", + "tracing", + "tui-textarea", + "vw-eda", + "vw-htcl", + "vw-lib", + "vw-vivado", + "winnow 0.6.26", +] + +[[package]] +name = "vw-vivado" +version = "0.1.0" +dependencies = [ + "async-trait", + "portable-pty", + "serde", + "serde_json", + "tempfile", + "thiserror 1.0.69", + "tokio", + "tracing", + "vw-eda", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -2135,6 +3226,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.6.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e90edd2ac1aa278a5c4599b1d89cf03074b610800f866d4026dc199d7929a28" +dependencies = [ + "memchr", +] + [[package]] name = "winnow" version = "0.7.15" @@ -2144,6 +3244,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "winreg" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +dependencies = [ + "winapi", +] + [[package]] name = "wio" version = "0.2.2" @@ -2247,6 +3356,23 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "x11rb" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9993aa5be5a26815fe2c3eacfc1fde061fc1a1f094bf1ad2a18bf9c495dd7414" +dependencies = [ + "gethostname", + "rustix 1.1.4", + "x11rb-protocol", +] + +[[package]] +name = "x11rb-protocol" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea6fc2961e4ef194dcbfe56bb845534d0dc8098940c7e5c012a258bfec6701bd" + [[package]] name = "yeslogic-fontconfig-sys" version = "6.0.0" @@ -2281,6 +3407,26 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.8.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zerofrom" version = "0.1.6" @@ -2340,3 +3486,18 @@ name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zune-core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9" + +[[package]] +name = "zune-jpeg" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27bc9d5b815bc103f142aa054f561d9187d191692ec7c2d1e2b4737f8dbd7296" +dependencies = [ + "zune-core", +] diff --git a/Cargo.toml b/Cargo.toml index fcfd4f0..a04976e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,23 @@ [workspace] resolver = "2" -members = ["vw-lib", "vw-cli"] +members = [ + "vw-lib", + "vw-cli", + "vw-htcl", + "vw-eda", + "vw-vivado", + "vw-analyzer", + "vw-quote", + "vw-ip", + "vw-htcl-cmd", + "vw-repl", +] [workspace.package] version = "0.1.0" edition = "2021" license = "MPL-2.0" -repository = "https://github.com/your-username/vw" +repository = "https://github.com/oxidecomputer/vw" [workspace.dependencies] # Shared dependencies @@ -24,3 +35,15 @@ url = "2.5" glob = "0.3" petgraph = "0.8.3" plotters = "0.3" +winnow = "0.6" +async-trait = "0.1" +tower-lsp = "0.20" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +futures = "0.3" +portable-pty = "0.9" +ratatui = { version = "0.29", features = ["crossterm"] } +crossterm = { version = "0.28", features = ["event-stream"] } +tui-textarea = { version = "0.7", default-features = false, features = ["crossterm", "ratatui"] } +#ipxact = { path = "/home/ry/src/ipe/crates/ipxact" } +ipxact = { git = "https://github.com/oxidecomputer/ipe", branch = "ry/init" } diff --git a/docs/authoring-htcl-libraries.md b/docs/authoring-htcl-libraries.md new file mode 100644 index 0000000..3514484 --- /dev/null +++ b/docs/authoring-htcl-libraries.md @@ -0,0 +1,704 @@ +# Authoring htcl libraries + +This document is a reference for engineers writing **htcl modules +that wrap underlying EDA IP, commands, or workflows**. The intended +output of such a module is one or more `proc` declarations that +downstream code can call from a workflow script to configure +hardware, drive a build, or otherwise script an EDA tool. + +The document covers: + +1. What htcl is and what it is for. +2. The full surface of the language — syntax, semantics, attributes. +3. Where htcl behaves the same as Tcl and where it differs. +4. How to validate an htcl program with `vw`. + +## 1. What htcl is + +htcl — "**h**ardware Tcl" — is a small structured dialect of Tcl +for HDL workflow scripting. It is the language `vw` uses to drive +EDA backends (today, Vivado over a pipe) and to give engineers a +source-controlled, reviewable, tool-checkable surface for everything +that would otherwise live as ad-hoc Tcl: IP configuration, block +design construction, project setup, simulation harnessing, and +custom commands that wrap a vendor tool. + +htcl is **not** specific to any single source of those wrappers. An +htcl library may be: + +- **Hand-written** by an engineer, wrapping a Vivado/Quartus + built-in or an in-house Tcl helper with a typed, doc-commented + interface. +- **Generated** from a vendor IP-XACT description (e.g. `vw ip + generate` emits an htcl wrapper for a Xilinx IP). The result is + ordinary htcl — there is nothing IP-XACT-shaped about it once + generated. +- **Generated from anything else** that has no IP-XACT — a custom + IP repo, a curated set of Tcl recipes, a board-bring-up script. + +What unifies them is the structured `proc` surface: every wrapper +is a proc with documented keyword arguments, default values, and +constraints the analyzer can check. + +At analysis time, the syntax tree drives the LSP (`vw analyzer`) — +completion, hover, signature help, error reporting. At run time, +htcl is lowered to plain Tcl and shipped to the backend. + +### Module shape + +An htcl library is one or more `.htcl` files. Most libraries place +their entry point at a conventional path (`src/.htcl`) and +may `src`-import additional files. A `proc` declared in any +imported file becomes callable in the consumer's scope. Proc names +are flat unless wrapped in a `namespace eval` block, which groups +related helpers under a `::` prefix — see §2.10 below. + +## 2. The language + +### 2.1 File overview + +An htcl file is a sequence of statements separated by newlines or +semicolons. Each statement is one of: + +| Statement | Purpose | +|---|---| +| Comment `# ...` | Free-form comment; ignored. | +| Doc comment `## ...` | Attached to the next `proc` or proc-arg; surfaces in hover. | +| Command `name word word ...` | A call to any command (Tcl builtin, EDA builtin, or htcl proc). | +| `set ` | Variable assignment (same as Tcl). | +| `proc { args } { body }` | Structured proc declaration (the main authoring construct). | +| `src ` | Import another htcl module (htcl-specific; no Tcl analogue). | + +Whitespace is significant only as a word separator. Indentation is +free-form. + +### 2.2 Word forms + +Every command word can be written in one of three forms — the same +three Tcl supports: + +| Form | Example | Semantics | +|---|---|---| +| Bare | `foo` | A literal word. May contain `$var` and `[cmd]`. | +| Quoted | `"hello $world"` | Variable and command substitution still happen; whitespace is preserved. | +| Braced | `{a b c}` | Literal text; no substitution. | + +Inside `[ … ]` command substitution, newlines are treated as +whitespace and do not need backslash continuations. **This is the +canonical form for call sites that don't fit on one line** — wrap +the call in brackets, bind the result with `set`, and let each +keyword argument live on its own line without backslash noise: + +```htcl +set cpm5_pcie1 [ + create_cpm5_cpm_pcie1 + -cell cpm5 + -max_link_speed 32.0_GT/s + -modes PCIE +] +``` + +A bare word ending in `\` does continue onto the next line (the +classic Tcl form), but bracket-bound `set` is the preferred style. + +### 2.3 Comments and doc comments + +htcl has two distinct comment forms with different purposes: + +- **Regular comments — `# ...`.** Free-form. Use these to record + rationale at call sites, to label sections of a workflow script, + to leave notes — anything that's "for the reader." The analyzer + ignores them. +- **Doc comments — `## ...`.** Semantically significant + documentation that attaches to the next `proc` declaration or + the next proc-arg. Tooling — the LSP for hover and signature + help, and documentation generators — consumes them. Use them + only inside a library, on definitions; they serve no purpose at + call sites. + +```htcl +## Configure an AXIS register slice. ;# doc-comment on the proc +proc create_axis_register_slice { + ## Block-design cell name. ;# doc-comment on this arg + cell_name + ... +} + +# Internal streaming bus between DMA and classifier. ;# call-site +# 128-bit because the classifier hits 100 Gb/s. ;# rationale — +set dma_to_classifier [ ;# use #, not ## + create_axis_register_slice + -cell_name dma_to_classifier + -tdata_width 128 +] +``` + +Multiple `##` lines stack into one block of doc-comment text on +the item they precede. + +### 2.4 The `proc` declaration + +This is the central authoring construct. The args list inside the +first `{ … }` is *structured*: each arg is a single identifier +preceded by optional doc comments and optional `@attribute(...)` +annotations. The args grammar is: + +``` +args := arg_item* +arg_item := doc_comment* attribute* IDENT +attribute := '@' IDENT ( '(' value ( ',' value )* ')' )? +value := integer | string | ident +``` + +Example: + +```htcl +## Configure a Versal CIPS instance. +## +## Sets the requested CONFIG.* properties on the supplied block-design cell. +proc create_versal_cips { + ## Block-design cell handle to set the property on. + cell + + ## Boot the secondary PCIe controller as well as the primary. + @enum(0, 1) @default(0) boot_secondary_pcie_enable + + ## Inner dict for the PMC subsystem. + @default("") ps_pmc_config +} { + set_property -dict [list \ + CONFIG.BOOT_SECONDARY_PCIE_ENABLE $boot_secondary_pcie_enable \ + CONFIG.PS_PMC_CONFIG $ps_pmc_config \ + ] $cell +} +``` + +Notes: + +- **htcl is keyword-only.** Every proc you declare accepts its + arguments as `-flag value` pairs at the call site. There is no + positional call syntax — even an arg with no `@default` (an + implicitly-required arg) must be passed as `-arg value`. The + args list above is the **declaration order**, used by the + validator for documentation and stable diagnostics, not by Tcl + for dispatch. +- Args with no `@default` are **required**. Omitting them at a + call site is a compile-time error from the validator. +- The proc body is plain Tcl text. The body refers to each arg by + its bare name (`$cell`, `$boot_secondary_pcie_enable`, …) — the + same way it would for a standard Tcl proc with named parameters. + The lowerer wires up these locals from the caller's `-flag + value` pairs at runtime via a generated `::vw::kwargs` prelude. +- Because the keyword parse happens at runtime (inside the + wrapper), call sites work uniformly at **any** nesting: at the + top level of a file, inside another proc's body, inside a + `namespace eval`, inside a `[ ... ]` command substitution, or + through an `eval`/`uplevel`. The lowerer doesn't need to see + the call site to translate it. +- `proc` itself may be declared at any depth, but only **top-level** + proc declarations are visible to the call-site validator. Procs + defined inside another proc's body ship as raw text and miss + the kwargs-prelude treatment — avoid nested proc declarations + in htcl, or write them in raw Tcl form (`proc inner args { ... + }`). + +### 2.5 Argument attributes + +Attributes go on the line(s) before the arg's identifier. They are +parsed positionally and may stack. Values are written with strings +quoted, integers bare, identifiers bare. + +| Attribute | Meaning | Example | +|---|---|---| +| `@default()` | Default value used when caller omits this arg. Presence makes the arg optional. | `@default(0) boot_secondary_pcie_enable` | +| `@enum(, , ...)` | Caller's value must match one of the listed literals. Validated when the value is a literal. | `@enum(0, 1) enable` | +| `@range(, )` | Caller's integer value must satisfy `lo <= n <= hi`. | `@range(1, 16) num_lanes` | +| `@requires()` | This arg, if set, requires `-` to also be set. | `@requires(has_tuser) tuser_width` | +| `@conflicts()` | This arg cannot coexist with `-`. | `@conflicts(slave_mode) master_mode` | +| `@deprecated[(msg)]` | Warning at call sites; optional human message. | `@deprecated("use -mode instead") legacy_mode` | + +`@enum` and `@range` only check **literal** call-site values. A +value that is itself an interpolation (`$var`, `[cmd]`) is not +statically checkable and silently passes — the runtime sees +whatever the interpolation produces. + +### 2.5.0a Argument types + +An argument may carry a type annotation in a `: TYPE` suffix on +the arg name: + +``` +proc plumb_pin { + ## What to name the external port. + name: string + + ## Identity of the pin to make external. + pin: bd_pin +} unit { + … +} +``` + +The annotation uses the same type vocabulary as the return-type +slot (§2.5.1) — primitives (`string`, `int`, `bool`, `unit`), +newtypes (`bd_cell`, `bd_pin`, …), and generics (`list`, +`dict`, with arbitrary nesting). Compatible with the +existing attribute grammar (`@default(0) count: int`). + +Annotations are optional — untyped args still parse. The +analyzer shows annotated args as `-name: TYPE` in hover and +signature help; the validator uses them to shape-check newtype +`::repr` / `from` / `to` triplets. + +See [htcl-return-types.md](htcl-return-types.md) for the full +type vocabulary, newtype declaration syntax, and worked +examples. For values that can take one of several shapes +(e.g. heterogeneous EDA return values), see +[htcl-enums.md](htcl-enums.md) for tagged sum types with +auto-generated constructors, repr, and overload dispatch. + +### 2.5.1 Return types + +A proc may carry a return-type annotation in a 4th-word slot +between the args block and the body: + +``` +proc make_widget { @arg(name) ... } widget { + …body… +} +``` + +The annotation drives the REPL printer (the result is formatted +through the type's `repr` proc) and the analyzer's hover / +signature-help (`proc NAME → TYPE`). Procs without an annotation +parse and behave identically to today — adoption is gradual. + +Available shapes: + +- Primitives: `string`, `int`, `bool`, `unit`. Built into the + compiler; no declaration needed. +- Generics: `list`, `dict`, with arbitrary nesting. +- User newtypes: any identifier introduced via `type NAME = + UNDERLYING`, accompanied by `::repr` / `from` / `to`. + +`unit` is the type for side-effecting procs that don't return a +meaningful value (logging, configuring, connecting). The REPL +suppresses the empty Result entry on `unit`-typed expressions. + +See [htcl-return-types.md](htcl-return-types.md) for the full +type vocabulary, newtype declaration syntax, and worked examples. + +### 2.6 Call sites + +The canonical call-site shape is `set [ ]` — +bind the call's return value to a name, and let the brackets handle +multi-line wrapping. Each keyword argument goes on its own line, no +backslash continuations needed: + +```htcl +set cips [ + create_versal_cips + -name cips + -cpm_config cpm5 +] + +# 250 MHz / 195 MHz aren't in the preset list but the clock +# generator will synthesize them — chosen for the eth core. +set ps_pmc_config [ + create_versal_cips_ps_pmc_config + -cell cips + -clock_mode Custom + -design_mode 1 + -pcie_apertures_dual_enable 0 + -pcie_apertures_single_enable 0 + -pmc_crp_pl0_ref_ctrl_freqmhz 250 + -pmc_crp_pl1_ref_ctrl_freqmhz 195 + -ps_board_interface Custom + -ps_pcie1_peripheral_enable 0 + -ps_pcie2_peripheral_enable 1 + -ps_pcie_reset {ENABLE 1} + -ps_use_pmcpl_clk0 1 + -ps_use_pmcpl_clk1 1 + -ps_use_pmcpl_iro_clk 0 + -smon_alarms Set_Alarms_On + -smon_enable_temp_averaging 0 + -smon_temp_averaging_samples 0 +] +``` + +A one-line call works the same way — just don't break across lines: + +```htcl +set cpm5 [create_cpm5 -name cpm5] +``` + +Rules the validator enforces on every call to a known proc: + +- Each `-flag` must be one of the declared args. +- Each `-flag` is given exactly one value (the next word). +- Each `-flag` appears at most once (a duplicate is a warning). +- Every required arg (no `@default`) must be present. +- `@requires` / `@conflicts` relationships are checked across + present args. +- Literal values are checked against `@enum` / `@range`. +- `@deprecated` flags produce warnings. + +Calls to commands that aren't declared `proc`s in the loaded +documents are **not** validated — they're assumed to be EDA or +Tcl builtins and are passed through verbatim. + +### 2.7 Variables and substitution + +Variables work as in Tcl: + +```htcl +set ref_clk 100 +puts "ref clock is $ref_clk MHz" + +# Braces suppress substitution. +puts {$ref_clk is literal here} +``` + +`$name` references resolve against the nearest enclosing scope — +local `set`s first, then the enclosing proc's parameter list. +There is no static type checking on variable values. + +### 2.8 Imports — `src` + +```htcl +src @amd-htcl/cpm5 ;# named workspace dependency +src @amd-htcl/cips +src "lib/utils.htcl" ;# relative to the importing file +src "/abs/path/to/file.htcl" ;# absolute filesystem path +``` + +The `src` statement loads and inlines another htcl module. Path +forms: + +- `@/` — resolved via `vw.toml`'s workspace + dependencies (the same dependency resolver `vw` uses for VHDL + deps). +- Anything starting with `/` — filesystem-absolute. +- Anything else — relative to the directory of the importing file. + +The path word must be a literal (bare or quoted text with no +`$var` or `[cmd]` parts). The loader is idempotent on canonical +paths: a file imported twice loads once. + +By the time an htcl program reaches the backend, all `src` imports +have been flattened into a single Tcl stream. + +### 2.9 A complete library example + +Hand-written wrapper around an IP, with no IP-XACT involved: + +```htcl +## A minimal AXIS interface configurator. +## +## Wraps the underlying create_bd_cell and set_property calls so that +## a consumer can request a configured AXIS slice with a few keyword +## arguments. +proc create_axis_register_slice { + ## Block-design cell name to instantiate at. + cell_name + + ## Width of the data bus in bits. + @enum(8, 16, 32, 64, 128, 256, 512) @default(64) tdata_width + + ## Include byte-strobe sideband. + @enum(0, 1) @default(0) has_tkeep + + ## Width of the optional user sideband; required when -has_tuser is on. + @range(1, 32) @requires(has_tuser) tuser_width + + ## Set when a user sideband is desired. + @enum(0, 1) @default(0) has_tuser + + ## Newer designs should use -has_tuser instead. + @deprecated("use -has_tuser") legacy_tuser_mode +} { + create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 $cell_name + set_property -dict [list \ + CONFIG.TDATA_NUM_BYTES [expr {$tdata_width / 8}] \ + CONFIG.HAS_TKEEP $has_tkeep \ + CONFIG.HAS_TUSER $has_tuser \ + CONFIG.TUSER_WIDTH $tuser_width \ + ] [get_bd_cells $cell_name] +} +``` + +A call site, with rationale captured in plain comments: + +```htcl +src @oxide-ip/axis + +# Internal streaming bus between the DMA and the packet classifier. +# 128-bit because the classifier hits 100 Gb/s line rate; tuser carries +# the classification verdict (5 bits today, room for one more flag). +set dma_to_classifier [ + create_axis_register_slice + -cell_name dma_to_classifier + -tdata_width 128 + -has_tkeep 1 + -has_tuser 1 + -tuser_width 6 +] +``` + +### 2.10 Namespaces — `namespace eval` + +When several procs share a logical prefix (`project::set_*`, +`ip::*`, `log::*`), wrapping them in a `namespace eval` block lets +each member be defined with a short bare name while still being +*called* under the qualified `::` form: + +```htcl +namespace eval project { + ## Set the target HDL language for new sources in a project. + proc set_target_language { + proj + @enum(VHDL, Verilog) language + } { + set_property -name TARGET_LANGUAGE -value $language -objects $proj + } + + ## Set the default library new sources land in. + proc set_default_library { + proj + @default(xil_defaultlib) library + } { + set_property -name DEFAULT_LIB -value $library -objects $proj + } +} + +# At a call site: +project::set_target_language -proj $proj -language VHDL +project::set_default_library -proj $proj +``` + +The analyzer treats each inner `proc` exactly as if it had been +written `proc project::set_target_language { ... } { ... }` at the +top level — same `@enum` / `@default` / `@requires` validation, +same hover, same signature help, same completion. The only +difference is source organization. + +Mechanics: + +- The `name` word can be a multi-segment Tcl namespace + (`namespace eval foo::bar { ... }`); the analyzer uses the + entire name as the prefix. +- `namespace eval` blocks nest. An inner `proc baz` inside + `namespace eval outer { namespace eval inner { ... } }` + registers as `outer::inner::baz`. +- A call from *inside* a namespace body to a sibling member must + still use the qualified name (no automatic same-namespace + resolution in v1). Write `project::helper $x`, not bare + `helper $x`. +- Lowering walks namespace bodies recursively, so inner procs get + their attributes stripped and the same `::vw::kwargs` runtime + prelude that top-level procs get. + +## 3. How htcl differs from Tcl + +htcl is a strict superset of the Tcl subset most engineers actually +write — anything you'd type in a Vivado console as a one-off +command parses as htcl. The structural differences come from htcl +adding new constructs and tightening the rules around `proc` +declarations. + +### 3.1 What htcl adds + +| Construct | htcl | Tcl | +|---|---|---| +| Doc comments | `## ...` carry to the next `proc` / proc-arg and feed hover. | Plain `#`; no first-class doc concept. | +| Structured `proc` args | Each arg is a doc-commented, attribute-tagged identifier. | Args are flat names or `{name default}` pairs. | +| Keyword call sites | `create_x -foo a -bar b` | Positional `create_x a b`. | +| Static validation | `@enum`, `@range`, `@requires`, `@conflicts`, etc. checked at parse-time. | None — errors only at runtime. | +| Module imports | `src @dep/file` or `src "rel/path.htcl"`. | `source ./foo.tcl`, with no dependency resolution. | +| Bracket-body line continuation | Newlines inside `[ … ]` are whitespace. | Newline terminates a command unless `\`-escaped. | + +### 3.2 What htcl restricts or interprets differently + +- **`proc` args are structured.** A v1 htcl `proc` cannot declare + its args as `{name default}` pairs or as `args` for varargs the + way pure Tcl can. Every arg is a single bare identifier, + optionally preceded by attributes. Defaults live in + `@default(...)`. +- **Required args come from the absence of `@default`.** Any arg + without `@default` is required. There is no `args` catch-all. +- **Call sites must use keyword form.** When the validator sees a + call to a known proc, positional words other than `-flag value` + pairs are reported as errors. Calls to *unknown* commands + (presumed EDA/Tcl builtins) pass through verbatim with no shape + check. +- **Doc comments are semantically significant.** `##` on a `proc` + or proc-arg is consumed by tooling — the LSP for hover, + documentation generators for output — so removing or relocating + one changes observable behavior. Regular `#` comments behave + exactly like Tcl comments. +- **`src` is parsed structurally.** The path word must be a + literal; `src $name` is rejected because the analyzer needs to + follow imports statically. +- **Top-level only for declarations and validated calls.** The + validator builds its signature table from top-level `proc` + declarations. A proc declared inside another proc's body still + parses, but its signature is not used to check call sites. + Likewise, the lowering pass rewrites top-level call sites to + known procs; calls *inside* a proc body are shipped verbatim. + Write your library entry points at the top level. + +### 3.3 What is unchanged + +Everything else is plain Tcl: + +- `$var`, `[cmd]`, `"..."`, `{...}`, backslash escapes. +- `set`, `expr`, `if`, `foreach`, `puts`, `list`, `dict`, … +- The proc body is just Tcl text. Anything you can do in Tcl + works inside a proc body; htcl makes no attempt to constrain it. + +If in doubt, write Tcl. htcl only diverges in service of the +structured proc surface; the body of every command is shipped +through to the backend as written. + +## 4. Checking an htcl program with `vw` + +`vw check` parses, validates, and reports errors and warnings for +one or more `.htcl` files without executing anything. It uses the +same analyzer pipeline the LSP uses, so a clean `vw check` means +the LSP will also be quiet. + +### 4.1 Basic invocation + +```bash +vw check src/cips.htcl +vw check src/lib/*.htcl +``` + +Output on a clean file: + +``` + Checking cips +``` + +Output with errors: + +``` +error: src/cips.htcl:42:23: value 3 for -boot_secondary_pcie_enable is not in @enum. Possible values are 0, 1 +error: src/cips.htcl:58:1: missing required argument -cell +src/cips.htcl: 2 error(s), 0 warning(s) +``` + +Each line carries an absolute file path, line, and column, in +`path:line:col: message` format. Spans inside `src`-imported files +are mapped back to their originating file, so an error in an +imported module reports the imported file's path — not the entry +point's. + +### 4.2 What `vw check` enforces + +The validator runs the rules described above: + +- **Parse errors.** Anything that doesn't lex/parse cleanly: + unterminated brace groups, missing values for `-flag` words, + malformed attributes. +- **Proc shape.** Duplicate proc declarations are an error (the + later one wins, matching Tcl's redefine semantics). +- **Call sites against known procs.** + - Unknown `-flag`: `undefined argument -. Possible values + are `. + - Missing value: `argument - is missing a value`. + - Missing required: `missing required argument -`. + - Duplicate flag (warning): `duplicate argument -`. + - `@enum` violation: `value for - is not in @enum. ...`. + - `@range` violation: `value for - is out of @range(...)`. + - `@range` on a non-integer literal: `argument - expects + an integer, found `. + - `@requires` unmet: `argument - requires - to also be + set`. + - `@conflicts` triggered: `argument - conflicts with -`. + - `@deprecated` (warning): `argument - is deprecated[: msg]`. +- **`src` imports.** Unknown dependency, missing file, non-literal + path, and parse errors inside imported files. + +### 4.3 What `vw check` does *not* enforce + +- Variable type, range, or existence inside a proc body — the + body is opaque to the analyzer in v1. +- EDA- or Tcl-builtin call shapes. A call to `set_property` or + `create_bd_cell` passes through unchecked. +- Values that go through `$var` or `[cmd]` substitution. `@enum` + and `@range` only see literal call-site words. +- Module-level public/private. Every top-level proc in every + loaded file is in scope. + +### 4.4 Related `vw` commands + +- `vw run ` — parses, validates, and executes through + the EDA backend. With `--check` it stops after the parse and + reports errors. Useful when you want to confirm a file is + shippable without spinning up the backend. +- `vw analyzer` — the LSP server (stdio). Editors point at it for + completion, hover, signature help, goto, and live error + reporting. The errors are exactly what `vw check` reports. +- `vw ip generate ` — generates an htcl wrapper + from an IP-XACT component. The generated file is itself a fully + valid htcl library; reading one is a fast way to see a real + wrapper's shape. + +### 4.5 Suggested authoring loop + +1. Sketch the proc signature: name the args, attach doc comments, + set `@default` for everything optional, mark `@enum` / + `@range` where the underlying domain is known and exhaustive. +2. Write the body in plain Tcl — `set_property`, `create_bd_cell`, + whatever the backend needs. +3. Run `vw check` to confirm the proc parses cleanly. +4. Add a call site in a separate `.htcl` file and run `vw check` + on that to confirm the validator agrees with the signature. +5. Open the file in an editor with `vw analyzer` configured for + `.htcl` and verify hover and completion behave as expected — + the doc comments you wrote are what consumers will read. +6. Once the surface looks right, run the call site through + `vw run` to see the lowered Tcl actually do something on the + backend. + +## Reference summary + +```text +File := Statement* +Statement := Command | Comment | DocComment | Proc | Src +Command := Word Word* +Word := Bare | Quoted | Braced +Comment := '#' .* NEWLINE +DocComment := '##' .* NEWLINE ; attaches to next proc / proc-arg +Proc := 'proc' Name '{' ArgList '}' '{' Body '}' +ArgList := ArgItem* +ArgItem := DocComment* Attribute* Ident +Attribute := '@' Ident ( '(' Value (',' Value)* ')' )? +Value := Integer | String | Ident +Src := 'src' PathWord +PathWord := '@''/' | '/' | '' +``` + +Canonical call-site shape: + +```text +set [ <-flag value>...] +``` + +Attributes recognized by the validator: + +```text +@default() ; default value, makes arg optional +@enum(, , ...) ; allowed literal values +@range(, ) ; integer range, inclusive +@requires() ; presence implies - present +@conflicts() ; presence forbids - +@deprecated[()] ; warns at call sites +``` + +Errors and warnings surface through: + +- `vw check ` — one-shot CLI. +- `vw run --check` — same checks, no execution. +- `vw analyzer` — same checks, live in the editor. diff --git a/docs/better-stack-trace-coverage.md b/docs/better-stack-trace-coverage.md new file mode 100644 index 0000000..37c85b5 --- /dev/null +++ b/docs/better-stack-trace-coverage.md @@ -0,0 +1,228 @@ +# Better stack-trace coverage via lowered proc-body instrumentation + +## Problem + +Some Vivado warnings and errors arrive with no stack trace because they +bypass every Tcl-level emission path our shim hooks. The canonical case +is `[IP_Flow 19-7090] Invalid parameter '…' provided, Ignoring`, emitted +from inside `set_property`'s C++ property validator. By the time those +bytes reach the worker's PTY reader, Tcl has already returned control to +the C++ caller and the Tcl call stack that produced them is gone — +neither the `puts` override nor the `send_msg_id` override fired. + +Today we handle this by **per-command instrumentation**: a wrap around +`::set_property` in `vw-vivado/shim/vivado-shim.tcl::install_set_property_context` +captures the Tcl call stack via `info frame` just before delegating into +the underlying C++ command, then ferries the captured frames to the +worker through PTY-side `__VW_CTX_*` markers. The worker tags any +Warning/Error chunks arriving while a context is active. + +This works for `set_property`. It doesn't work for any other Vivado +command that emits async warnings the same way — `create_bd_cell`, +`connect_bd_intf_net`, `validate_bd_design`, etc. Each one needs its own +wrap. + +## Proposal + +Replace per-command wraps with **universal coverage** by instrumenting +every lowered htcl proc body to maintain an explicit htcl-coordinate +stack in Tcl globals. When a warning arrives via PTY, the worker reads +the current top of that stack as the context. + +This is the moral equivalent of "what if we just fed every statement +through Rust one at a time" — it gives us full visibility into the +runtime call chain without rewriting Tcl's proc dispatch. The data +lives in Tcl globals because that's where execution actually happens, +but we control what goes in and when, and the data is the same htcl +coordinates we'd track if we were stepping each statement from Rust. + +### What the lowerer emits + +`vw-htcl/src/lower.rs::lower_proc_decl` currently emits: + +```tcl +proc configure_cips {args} { ::vw::kwargs $args {…} +…body statements at their source line numbers… +} +``` + +We'd extend it to bracket each body statement with push/pop calls: + +```tcl +proc configure_cips {args} { ::vw::kwargs $args {…} + ::vw::stack push "ip/cips.htcl:14 in ::configure_cips" + + ::vw::stack swap "ip/cips.htcl:17 in ::configure_cips" + + ::vw::stack swap "ip/cips.htcl:23 in ::configure_cips" + + … + ::vw::stack pop +} +``` + +`push` adds a new frame, `swap` replaces the current top in-place (so +we don't grow the stack one entry per statement), `pop` removes it at +proc exit. + +Top-level (non-proc) statements get the same treatment in +`dispatch_eval`'s shipped script. + +### Shim helpers + +```tcl +namespace eval ::vw::stack { + variable frames {} + proc push {frame} { + variable frames + lappend frames $frame + } + proc swap {frame} { + variable frames + if {[llength $frames] > 0} { + lset frames end $frame + } else { + lappend frames $frame + } + } + proc pop {} { + variable frames + set frames [lrange $frames 0 end-1] + } + proc snapshot {} { + variable frames + return $frames + } +} +``` + +The shim's existing `attach_stack_if_message` (puts override path) keeps +using `info frame` — it works fine. For the PTY-bypass path, we replace +the per-command marker wraps with a single hook that emits the snapshot +whenever Vivado is about to do something async. The cleanest version: +emit the snapshot **on every statement boundary**, so the worker always +has the latest context without needing per-command opt-in. + +Concretely, every `stack swap` call also writes the new frame to the +PTY as a marker: + +```tcl +proc swap {frame} { + variable frames + if {[llength $frames] > 0} { + lset frames end $frame + } else { + lappend frames $frame + } + ::vw::emit_pty_ctx_replace $frames +} +``` + +`emit_pty_ctx_replace` writes one `__VW_CTX_BEGIN__` / frames / +`__VW_CTX_READY__` group, replacing whatever the worker currently has +active. No `__VW_CTX_END__` is sent — the context is always "the most +recent statement we entered." It gets replaced on the next statement +and reset when an eval completes (worker clears on `EvalDone`). + +### Worker + +The worker already handles `__VW_CTX_BEGIN__` / `__VW_CTX_FRAME__:` / +`__VW_CTX_READY__` / `__VW_CTX_END__` markers via +`worker.rs::consume_ctx_marker` and tags warnings/errors via +`emit_pty_chunk`. No changes needed there beyond: + +- Treat absence of `__VW_CTX_END__` as "always active until eval ends." + The worker should clear `active_pty_context` on `EvalDone` to avoid a + context from one user submission contaminating the next. +- Drop `install_set_property_context` from the shim — universal + coverage subsumes it. + +### Rust-side resolver + +`vw-repl/src/app.rs::resolve_stack_frames` already rewrites +`:N in ::procname` to absolute `(file, line)` via the session's +proc table. No changes needed — the marker frames are already in that +shape. + +## Tradeoffs + +### Pros + +- **Universal coverage.** Any Vivado command emitting async warnings + gets a stack trace, not just `set_property`. We stop playing whack- + a-mole every time a new IP throws a different warning class. +- **Removes per-command wraps.** `install_set_property_context` and + any future siblings (`install_validate_bd_design_context`, etc.) + all go away. +- **Statement-precise.** Currently the tagged frame for an + IP_Flow warning points at the `set_property` call site. Under + this scheme it points at the actual `create_versal_cips` call in + the user's `configure_cips` body, because the most-recent `swap` + captured *that* statement before Tcl dispatched into the wrapper + that eventually called `set_property`. Closer to "what line of my + code is responsible." + +### Cons + +- **Codegen overhead.** Every lowered proc body grows by ~one + `::vw::stack swap` call per statement. For a file like + `vivado-cmd/module.htcl` that's significant but not catastrophic; + the strings are short and Tcl's bytecode compiler handles them + cheaply. For `cpm5/module.htcl` (~880 procs, each with ~5–200 + statements) the lowered text grows by a similar factor — measure + before / after on the `--load` cold-start time to know if it + matters. +- **PTY marker volume.** Each `swap` writes a marker group + (~3 lines) to the PTY. For an eval that fires 1000 statements, + that's 3000 marker lines to filter out on the worker side. + Cheap per line but adds up. Mitigate by only emitting markers when + *about to* call into a typed external — but that gets us back to + per-command opt-in. +- **Coupling to Tcl's eval order.** The htcl-coordinate stack only + stays accurate if every statement reaches its `swap` call. If a + Tcl `error`/`break`/`continue` jumps out of a body mid-statement, + the `pop` at proc exit cleans up, but a partially-walked body + could leave a stale frame as "current" until the next `swap` or + `EvalDone` clears it. In practice this only affects the window + between the error and the next event — same scope as the current + per-command wrap. +- **Visible inside `if` / `foreach` bodies?** Control-flow constructs + in htcl are braced Tcl scripts that Tcl evaluates internally — + the lowerer doesn't walk into braced sub-scripts today, so an + `if { … } { call X }` would only emit `swap` for the outer `if`, + not for the inner `call X`. Whether that resolution matters + depends on how often the user wants line-precise info for code + inside an `if`. Could be added later by lowering braced bodies as + scripts too. + +## When to do it + +Open question. The current per-command wrap covers `set_property`, +which empirically catches ~all the IP_Flow validation warnings the +user has hit so far. Universal coverage becomes worth the codegen +overhead when: + +- A second Vivado command starts emitting async warnings we want + traced (an obvious sign: someone adds an + `install__context` proc and we realize it's the third one). +- The `set_property` wrap starts missing cases (e.g. Vivado adds a + property-setter path that bypasses `::set_property`). +- We want statement-precise warning attribution rather than + "warning happened during a `set_property` call in proc X" — i.e. + the difference between `at ip/cips.htcl:69` (the `create_versal_cips` + call) vs `at vivado-cmd/cmd/set_property.htcl:80` (inside the + wrapper that eventually invoked `set_property`). + +Until one of those bites, the per-command wrap is the cheaper bet. +This file is the breadcrumb for when it doesn't. + +## References + +- `vw-vivado/shim/vivado-shim.tcl::install_set_property_context` — the + current per-command implementation +- `vw-vivado/src/worker.rs::consume_ctx_marker`, + `vw-vivado/src/worker.rs::emit_pty_chunk` — the marker-consumer side +- `vw-htcl/src/lower.rs::lower_proc_decl` — where the + push/swap/pop emission would go +- `vw-repl/src/app.rs::resolve_stack_frames` — the htcl-coordinate + resolver, unchanged by this proposal diff --git a/docs/htcl-enums.md b/docs/htcl-enums.md new file mode 100644 index 0000000..3a1c7d0 --- /dev/null +++ b/docs/htcl-enums.md @@ -0,0 +1,350 @@ +# Enums (tagged sum types) in htcl + +Enums are htcl's way to model values that can be one of several +distinct shapes. They're tagged unions: every value carries a +variant tag at runtime, the compiler auto-generates the +boilerplate (constructors, repr, accessors), and overloaded +handlers dispatch on the tag with no runtime string introspection +on the compiler side. + +This is the principled answer to "I have a value that's +*sometimes* a scalar and *sometimes* a nested dict" — the +canonical case being Vivado property values, where +`get_property NAME $obj` returns a string but +`get_property CONFIG.PS_PMC_CONFIG $obj` returns an embedded +property dict. + +See also: [htcl-return-types.md](htcl-return-types.md) for the +broader type system, [authoring-htcl-libraries.md](authoring-htcl-libraries.md) +for the surrounding arg/return-type annotation grammar. + +## Syntax + +Declaration: + +``` +enum Property = { + Scalar: string + Nested: dict +} + +type Properties = dict +``` + +The variants block is **brace-wrapped and newline-separated** — +the same shape as `proc {arg1; arg2}`. Each variant is +`IDENT (':' TYPE)?`; the payload type is optional, so +empty-payload variants are first-class: + +``` +enum Direction = { + North + South + East + West +} +``` + +Qualified variant types for use in arg annotations: + +``` +proc handle_prop {v: Property::Scalar} string { return "scalar: $v" } +proc handle_prop {v: Property::Nested} string { return "nested children: [llength $v]" } +``` + +The compiler sees that two `handle_prop` procs share a name, that +each first arg is a different variant of `Property`, and +synthesizes a public `handle_prop` dispatcher — no user-written +`proc handle_prop {v: Property} ...` boilerplate. + +## Runtime representation + +Every variant value is a Tcl list: + +- **With payload**: `[list ]` — two elements. + `Property::Scalar "foo"` → `[list Scalar foo]`. +- **Without payload**: `[list ]` — single element. + `Direction::North` → `[list North]`. + +The variant short-name (`Scalar`, not `Property::Scalar`) is +enough for dispatch because the dispatcher already knows which +enum it's switching on. + +## Auto-emitted machinery + +For an enum declaration the compiler emits a single +`namespace eval { … }` block. For +`enum Direction = { North; South: int; East; West }` it looks +roughly like: + +```tcl +namespace eval Direction { + # Constructors — one per variant. + proc North {} { return [list North] } + proc South {v} { return [list South $v] } + proc East {} { return [list East] } + proc West {} { return [list West] } + + # Accessors — explicit unwrap entry points wrappers use when + # bridging to extern:: calls. + proc tag {v} { return [lindex $v 0] } + proc payload {v} { return [lindex $v 1] } + + # repr — switches on tag, calls payload type's repr. Renders + # as `Variant()` for payload variants, bare `Variant` + # for empty ones. + proc repr {v} { … } + + # from / to — identity (exist so generics over enums type-check + # uniformly with newtypes). + proc from {v} { return $v } + proc to {v} { return $v } +} +``` + +**No user-written triplet is required for an enum** (newtypes +require `repr`/`from`/`to`; enums get them auto-generated). If +the user wants custom rendering, they can override the proc +post-hoc — same as any other htcl proc. + +## Bridging to extern (lowering) + +EDA builtins (`extern::create_bd_cell`, `extern::get_property`, +etc.) don't understand tagged tuples — they expect bare Tcl +primitives. Any time an enum value flows into an `extern::` call, +it has to be unwrapped first. + +**v1 policy: explicit unwrap, no auto-lowering.** Wrappers (the +procs that own the `extern::` boundary) explicitly extract the +payload via `::payload`: + +``` +proc vivado_cmd::set_string_prop {obj: bd_cell; name: string; val: Property} unit { + # Property::payload extracts the inner value; if val is + # Scalar("foo"), this yields "foo". The wrapper is responsible + # for knowing this is the right shape — the compiler doesn't + # auto-coerce. + extern::set_property -dict [list $name [Property::payload $val]] -objects $obj +} +``` + +Newtypes don't need unwrap — `bd_cell` IS a string at runtime, +so `extern::foo $cell` already passes the right thing. + +Compiler-side **auto-lowering** (walk the expression tree, find +every enum-typed value being passed to an extern, insert the +unwrap automatically) requires full type inference across +expressions and is out of scope for v1. The explicit +`::payload` form is principled (no magic at call sites) +and gives wrapper authors visibility into where lowering +happens. + +## Lifting from extern + +The other direction — taking an EDA function's raw Tcl return +value and tagging it into a typed enum — is per-function +business. `extern::get_property NAME $obj` returns a scalar; +`extern::get_property CONFIG.PS_PMC_CONFIG $obj` returns an +embedded dict. Whether a given property is one or the other is +**metadata the wrapper queries from the EDA tool** (e.g. +`extern::report_property -type`), not a shape-of-string +heuristic. + +**v1 policy: lifting lives in the wrapper, not the compiler.** +Each wrapper that returns an enum decides which variant to +construct. The compiler doesn't try to be smart — there's no +shape-guessing path in compiler-emitted code. + +To avoid every wrapper reinventing the wheel, the +`~/src/htcl/amd/vivado-cmd/lift.htcl` library provides a small +set of reusable helpers: + +``` +# Structural check: is the string a well-formed Tcl list with +# an even length and bare-ident keys? Used by wrappers that +# already have other evidence the value MIGHT be a paired dict +# and need a sanity check — NOT as a primary classifier. +proc lift::looks_like_paired_dict {raw: string} bool { … } + +# Vivado-specific: lift a property value to Property using +# `extern::report_property -type` metadata. The classifier IS +# the heuristic — but it's named, scoped, and called from one +# place instead of being baked into the compiler. +proc lift::vivado_property {obj: bd_cell; name: string; raw: string} Property { … } +``` + +Wrappers compose these. Custom cases write their own lifters — +the helper library is convenience, not a requirement. + +**Future direction**: F# data providers as inspiration for +`vw ip generate`. Given an IP-XACT schema, the generator could +emit not just wrappers but also the per-component tagging logic +— declarative schema in, typed lifting out. Worth investigating +once the v1 enum machinery is in user hands and we see which +lifting patterns actually recur. + +## Overload dispatch + +When two or more procs share a name AND each one's first arg is +declared as a different variant of the same enum, the compiler +treats them as **a single overloaded function** rather than a +duplicate-definition warning. + +``` +proc handle_prop {v: Property::Scalar} string { return $v } +proc handle_prop {v: Property::Nested} string { + set parts [list] + foreach {k val} $v { lappend parts "$k=[handle_prop $val]" } + return [join $parts ", "] +} +``` + +The compiler: + +1. **Verifies exhaustiveness** — every variant of `Property` + must have a handler. Missing variants are a hard error + pointing at the first overload, listing the gaps. +2. **Verifies tail-arg agreement** — every overload must + declare identical args after the dispatched first one + (same names, attributes, type annotations). +3. **Verifies return-type agreement** — every annotated + return must be identical. Mixing annotated and unannotated + is an error. +4. **Renames specializations** to `__handle_prop__Scalar` + and `__handle_prop__Nested` internally. User procs whose + names start with `__` are forbidden — that prefix is + reserved for compiler-emitted names. +5. **Synthesizes a public dispatcher**: + ```tcl + proc handle_prop {v args} { + switch -- [lindex $v 0] { + Scalar { return [__handle_prop__Scalar [lindex $v 1] {*}$args] } + Nested { return [__handle_prop__Nested [lindex $v 1] {*}$args] } + } + } + ``` + The payload is unwrapped before the specialization runs, so + the body of `proc handle_prop {v: Property::Scalar}` sees + `$v` as the bare string — matches Haskell `case` semantics. +6. **Registers a synthetic public signature** in the proc table + under the public name. Specializations register under their + mangled names so analyzer drill-down still finds them. + +### What's NOT allowed + +Two procs sharing a name where the first args aren't both +variants of one enum is a **hard error** ("ad-hoc overloading +not supported"). Examples: + +- `proc foo {x: int}` + `proc foo {x: string}` — different + primitives, no enum to dispatch on. +- `proc foo {x: Property::Scalar}` + `proc foo {x: Color::Red}` + — different enums. +- `proc foo {x: Property::Scalar}` + `proc foo {x: Property::Scalar}` + — duplicate variants. + +If you legitimately want a single function that handles +unrelated types, rename one of them or wrap the union in an +enum. + +## Recursive types + +Enums and the types they reference can be mutually recursive: + +``` +enum Property = { + Scalar: string + Nested: Properties +} +type Properties = dict +``` + +`Property` references `Properties`, which references `Property`. +Codegen handles this fine — Tcl resolves proc references at call +time, not parse time, so the order in which the namespaces are +emitted doesn't matter. The validator's type-decl-table +collection runs to completion before per-type checks fire, so +forward references work. + +## Worked example: `util::props` + +The motivating case. Vivado property values are heterogeneous — +some scalars (`NAME cips`), some embedded dicts +(`CONFIG.PS_PMC_CONFIG CLOCK_MODE Custom DESIGN_MODE 1 …`). + +Pre-enum (today): `util::props` returns `dict`. +Embedded dict values render as long single lines that wrap at +the terminal — visually confusing. + +With enums: + +``` +# types.htcl +enum Property = { + Scalar: string + Nested: Properties +} +type Properties = dict + +# lift.htcl — the heuristic lives in a named, scoped place. +proc lift::vivado_property {obj: bd_cell; name: string; raw: string} Property { + set kind [extern::report_property -type $obj $name] + if {$kind eq "bool" || $kind eq "string" || $kind eq "long"} { + return [Property::Scalar $raw] + } + # Composite: recurse through the embedded dict. + set inner [dict create] + foreach {k v} $raw { + dict set inner $k [lift::vivado_property $obj "$name.$k" $v] + } + return [Property::Nested $inner] +} + +# util.htcl — the wrapper just builds the typed result; the +# compiler handles the rendering via the auto-generated +# Property::repr and the monomorphized Properties::repr. +proc util::props {object: bd_cell} Properties { + set result [dict create] + foreach name [extern::list_property $object] { + set raw [extern::get_property $name $object] + dict set result $name [lift::vivado_property $object $name $raw] + } + return $result +} +``` + +In the REPL: + +``` +› util::props -object $cips + CLASS Scalar(bd_cell) + NAME Scalar(cips) + CONFIG.PS_PMC_CONFIG Nested( + CLOCK_MODE Scalar(Custom) + DESIGN_MODE Scalar(1) + PCIE_APERTURES_DUAL_ENABLE Scalar(0) + … + ) + … +``` + +— recursive structure rendered with no string-shape heuristics +in the compiler-emitted code. The `Property::repr` switch +dispatches on tag, `Properties::repr` (auto-monomorphized from +`dict`) iterates pairs and recurses. + +## Out of scope for v1 + +- **Compiler-side auto-lowering at extern:: call sites** — would + need cross-expression type inference. Wrappers explicitly + unwrap via `::payload`. +- **Ad-hoc overloading** (procs sharing a name where args aren't + variants of one enum) — hard error; add as a distinct feature + later if needed. +- **Multi-arg dispatch** (Julia-style) — first-arg dispatch only. +- **Generic enums** (`enum Result = Ok: T | Err: E`) — needs + type-parameter machinery; defer. +- **Pattern guards / nested patterns** — single arm per variant. +- **F#-style data-provider generation** for IP-XACT schemas — + declarative schema → typed lifting code is a multi-week + project of its own. Note as future direction. diff --git a/docs/htcl-return-types.md b/docs/htcl-return-types.md new file mode 100644 index 0000000..2bcfd48 --- /dev/null +++ b/docs/htcl-return-types.md @@ -0,0 +1,246 @@ +# Return-type annotations in htcl + +htcl procs may declare a return type in a 4th-word slot between the +args block and the body: + +``` +proc make_widget { @arg(name) ... } widget { + …body… +} +``` + +The annotation is purely additive — procs without it parse and run +identically to today. With it, the REPL printer and the analyzer +(hover, signature help) start treating the proc's result as +typed. + +## Syntax + +Three pieces: + +``` +proc NAME { ARGS } TYPE { BODY } +``` + +The args list and body keep their existing shapes (see +[authoring-htcl-libraries.md](authoring-htcl-libraries.md) for +the arg attribute grammar). The new `TYPE` slot is a single htcl +word: + +- A bare identifier: `string`, `int`, `bool`, `unit`, `bd_cell`, + `widget`, … +- A generic with no whitespace: `list`, + `dict`, `list>`. +- A brace-wrapped type when the expression contains spaces: + `{dict}` — the parser strips the outer braces + before type-parsing. + +### Grammar + +``` +Type ::= IDENT ('<' Type (',' Type)* '>')? +``` + +Nested generics work to arbitrary depth. + +For heterogeneous values that can take one of several distinct +shapes — e.g. Vivado property values that are sometimes scalars +and sometimes embedded dicts — use **enums** (tagged sum types). +See [htcl-enums.md](htcl-enums.md) for the full design. + +## Type vocabulary + +### Primitives (built into the compiler) + +| Type | Repr | +| --------- | ------------------------------------------------------- | +| `string` | identity | +| `int` | `[format %d $v]` | +| `bool` | `true` / `false` | +| `unit` | empty string; the REPL suppresses the Result entry | + +`unit` is the "I don't return a meaningful value" type — use it on +side-effecting procs (logging, connecting, configuring) so the +REPL stops trying to render their empty return as output. + +### Newtypes (user-declared) + +Any other type is a newtype, introduced by: + +``` +type NAME = UNDERLYING +``` + +Every newtype declaration **must** be accompanied by three procs +in a namespace matching the type name — the validator rejects the +program otherwise: + +| Proc | Signature | Purpose | +| -------------- | ---------------------------------- | -------------------------------------------- | +| `::repr` | `proc ::repr { v } string { … }` | Render an instance for display. | +| `::from` | `proc ::from { v } { … }` | Validate + lift an underlying value into T. | +| `::to` | `proc ::to { v } { … }` | Extract the underlying value back out of T. | + +The `from` proc is the one place to validate — e.g. reject +strings that don't match the Vivado-path shape `^/[\w/]+$` before +they get treated as a `bd_cell`. + +### Example: Vivado's typed handles + +The whole `bd_*` family lives in +`~/src/htcl/amd/vivado-cmd/types.htcl`: + +``` +type bd_cell = string + +proc bd_cell::repr {v} string { return $v } +proc bd_cell::from {v} bd_cell { + if {![regexp {^/[\w/]+$} $v]} { + error "bd_cell::from: '$v' is not a valid block-design path" + } + return $v +} +proc bd_cell::to {v} string { return $v } +``` + +All `bd_pin`, `bd_intf_pin`, etc. follow the same template. + +### Example: a domain newtype + +A user library can introduce its own types the same way: + +``` +type pcie_lane_count = int + +proc pcie_lane_count::repr {v} string { + return "x$v" ;# render as "x1", "x2", "x4", "x8", "x16" +} + +proc pcie_lane_count::from {v} pcie_lane_count { + if {$v ni {1 2 4 8 16}} { + error "pcie_lane_count must be one of {1 2 4 8 16}, got $v" + } + return $v +} + +proc pcie_lane_count::to {v} int { return $v } +``` + +Now a proc annotated `} pcie_lane_count {` will render `x4` in +the REPL instead of `4`. + +### Generics + +`list` and `dict` work over any composition of primitives +and newtypes — the compiler monomorphizes a `repr` proc per unique +instantiation, dispatching to the user's per-type `::repr` at +element boundaries. + +``` +proc list_of_cells {} list { return [list /a /b /c] } +``` + +The REPL invokes the compiler-generated +`list_bd_cell::repr` on the result, which iterates the list and +joins each element's `bd_cell::repr` rendering with newlines: + +``` +› list_of_cells + /a + /b + /c +``` + +For dicts the rendering is `KEY VAL` pairs, one per line: + +``` +proc props {} dict { … } + +› props -object $cips + CLASS bd_cell + NAME cips + … +``` + +## What the type drives + +| Subsystem | Behavior | +| ---------------------- | ---------------------------------------------------------------------------------------- | +| REPL result printer | Wraps the expression with the type's `repr` proc; `unit` suppresses the Result entry. | +| Analyzer hover | Shows `proc NAME → TYPE` in the hover popup. | +| Analyzer signature help | Appends ` → TYPE` to the signature label. | + +Unannotated procs keep the legacy heuristic formatter as a fallback, +so adopting annotations is gradual — annotate as you go. + +## Argument types + +Arguments use the same vocabulary as return types, declared with +a `: TYPE` suffix on the arg name: + +``` +proc plumb_pin { + ## What to name the external port. + name: string + + ## Identity of the pin to make external. + pin: bd_pin +} unit { + … +} +``` + +Rules: + +- Same grammar as return types — primitives, newtypes, and + generics with arbitrary nesting. +- Compatible with existing attributes (`@default(0) count: int`, + `@enum(Master, Slave) mode: string`). +- Optional. Untyped args still parse — adoption is gradual. + +What the annotation drives: + +- **Validator shape checks.** Newtype `::repr/from/to` procs + get a full shape check: `repr` must take `v: T` and return + `string`; `from` must take `v: ` and return `T`; + `to` must take `v: T` and return ``. Annotations + are *optional* on these procs — unannotated args/returns pass + as "trust the user". Annotated mismatches are a hard error. +- **Analyzer display.** Hover and signature help render the arg + as `-name: TYPE` instead of the bare `-name` form. + +Out of scope for v1 (future work): call-site validation +("you're passing a `string` where a `bd_cell` is expected"), +unions, and inference for unannotated args. + +## Authoring conventions + +- **Annotate as you write.** Same effort as documenting an arg, + same payoff as a TypeScript return-type hint. +- **Prefer specific named newtypes over `string`** for values + that have a well-defined shape (paths, IDs, port names). The + `from`/`to` triplet documents the invariant and the `from` + validator catches typos at the boundary. +- **Use `unit` for side-effecting procs.** Anything that calls + `set_property`, `connect_*`, `puts`, or `log::*` is almost + certainly `unit`. The REPL won't bother trying to display + whatever Tcl-internal value falls out. +- **Generics nest freely.** `dict>` is fine. + Don't be afraid to be specific. + +## Limitations (v1) + +- **No arg-type annotations yet.** Args still use only the + attribute grammar (`@default`, `@enum`, etc.). The + `::repr/from/to` validator only checks the procs EXIST, + not that their signatures match shape — that arrives with arg + types. +- **No inference.** Unannotated procs are simply untyped; we + don't walk the body to derive a return type from `return`. +- **No union or function types.** Start small; extend the grammar + as need shows up. + +For the implementation, see `vw-htcl/src/repr.rs` (codegen), +`vw-htcl/src/type_parse.rs` (mini-parser), and the design notes +in [the original plan](../docs/plans/return-types.md) if it +survives the cleanup. diff --git a/projects/htcl-project-plan.md b/projects/htcl-project-plan.md new file mode 100644 index 0000000..463d73e --- /dev/null +++ b/projects/htcl-project-plan.md @@ -0,0 +1,1736 @@ +# vw: extending for HDL workflow scripting + +## Audience and intent + +This document is a working plan for extending [`vw`](https://github.com/oxidecomputer/vw) +with first-class support for HDL workflow scripting: a structured TCL dialect +("htcl"), a workflow-aware analyzer (LSP), an interactive REPL, and a +Vivado-driving executor. The audience is Claude Code working with the +author. Treat it as a living spec — open questions are called out +explicitly; close them with the author before locking in design decisions. + +## Goal + +The underlying purpose of this work is **complexity management for HDL +designs**: making IP configuration and workflow scripting first-class +source-controlled artifacts that engineers can read, write, review, and +evolve over years. See "Strategic context" below for the full framing. +The concrete capabilities the project adds to `vw`: + +1. **Provide a best-in-class interactive experience for HDL workflow + code, in both the editor and a REPL.** This is the primary goal. + Completion, hover, diagnostics, and navigation should match what a + Rust or TypeScript developer expects from their IDE — and the same + capabilities should be available in an interactive shell that + replaces Vivado's TCL console. Both surfaces consume the same + analysis backend (`vw-htcl`), so a feature built for one is + available to the other for free. Every other language-design + decision in this document is partly in service of this goal — the + parser, the proc grammar, the module system, and the reuse of + `vw`'s dependency resolver all exist in forms designed to be + statically analyzable. +2. **Establish a unified multi-language LSP for the HDL workflow.** + `vw analyzer` is designed from day one as a multi-language language + server. htcl is the first language wired up (and the focus of v1); + VHDL is the planned second, initially via a `vhdl_ls` proxy and + eventually via direct integration with Oxide's developing VHDL + frontend. The architecture (a `LanguageBackend` abstraction and + per-file dispatch) is in place from the initial analyzer phase even + while only htcl is wired up. See "LSP design" for the full + treatment. +3. Provide an ergonomic dialect of TCL ("htcl") for HDL workflow + scripting, with first-class support for structured proc + declarations, modules, and dependencies resolved via `vw`. This + dialect is not typed in v1; the structural improvements + (per-argument doc comments, attributes like `@default` / `@enum` / + `@required`, real imports) deliver most of the value without + committing to a type system before we know what shape it should + take. +4. Execute htcl by talking to Vivado's built-in TCL interpreter over a + pipe, with a thin TCL shim on the Vivado side. +5. Stay vendor-aware: Vivado first, but the architecture should + accommodate Quartus and other backends later. + +This is an alternative to `set_property -dict {...}` bag-of-strings IP +config, ad-hoc `source [file join $::ROOT ...]` module loading, and +Vivado's generally unpleasant interpreter as a development environment. + +## Strategic context: complexity management + +The underlying problem this project addresses is that today's IP +integration workflow is intrinsically lossy with respect to source +control. Integrating IP into a design currently means reading user +guides and architecture manuals, then mapping what's learned into +GUI-based configuration in Vivado. The connection between official +documentation and GUI configuration is ambiguous and — critically — +not reproducible from an engineering-process perspective. The +artifacts that end up in source control (TCL block design exports, +generated wrappers, project files) don't capture how those +configurations were created or why specific parameterizations were +chosen. There's nowhere in the workflow to record rationale, no way to +review configuration changes the way code is reviewed, and no +mechanism to evolve a design over years and remember why it looks the +way it does. + +htcl is a complexity-management tool first, and a TCL replacement +second. Configuration becomes textual source code: reviewable, +diffable, doc-commentable, version-controlled, and analyzed by +tooling. Rationale lives next to the configuration it explains. The +artifact in source control is the authoritative record of *what was +chosen and why*, not a lossy projection of decisions made in a GUI. + +### Conceptual layering: specification, interface, instantiation + +These are three distinct things. Confusing them leads to bad design +decisions, so they're named explicitly here: + +**IP specification (IP-XACT).** Describes what an IP *is* — its full +parameter space, the parameterized port set, the parameterized memory +map, and the relationships between configuration choices and the +resulting structure. The specification is static; it covers all valid +configurations. IP-XACT is the format vendors use for this. + +**Configuration interface (htcl).** A means to invoke an IP at a +specific configuration, with human ergonomics. An htcl wrapper is a +proc whose arguments are the IP's parameters and whose body emits the +underlying `create_ip` / `set_property` calls. The htcl proc *is not* +a description of the IP — it's a means to pick a configuration and +record the rationale for that pick. Different layer entirely from +IP-XACT. + +**Instantiation (RTL + memory map).** What you get when you run a +configuration interface at chosen parameter values: a specific VHDL +entity (the wrapper Vivado generates) and a specific memory map (RSF, +in our case). These are the concrete artifacts at one configuration. + +The lifecycle is: + +1. *Specification* (IP-XACT, authored by the IP vendor): all valid + configurations and their resulting structure. +2. *Configuration interface* (htcl wrapper, generated from IP-XACT by a + sideband tool): the ergonomic surface for picking a configuration. +3. *Configuration choice* (htcl call site, hand-written by the + engineer): specific parameter values, with doc comments capturing + rationale, source-controlled and reviewed. +4. *Instantiation* (generated VHDL + RSF, produced by running htcl + through Vivado): the actual artifacts at the chosen configuration. + +htcl sits squarely at layer 2, with call sites at layer 3. It does +*not* attempt to subsume IP-XACT (layer 1) or replace generated RTL +and RSF (layer 4). The value of htcl is in giving layer 3 a +first-class, source-controlled, tool-analyzable form. + +### The Vivado-team pitch + +There is an active conversation with the Vivado team about Xilinx +publishing htcl configuration interfaces alongside the IP-XACT they +already publish. The pitch: + +> IP-XACT is your specification format and stays the source of truth. +> What's missing is a published *configuration interface* — a layer +> where engineers can record what configuration they chose and why, in +> a form that is source-controllable, reviewable, and analyzable by +> tooling. Today engineers do this in GUIs, and the resulting TCL +> dumps don't capture intent. htcl wrappers, generated from your +> IP-XACT, fill that gap. You keep the specification; we get a +> rationale-preserving configuration layer that bridges to your +> existing pipelines unchanged. + +This is a smaller, more defensible pitch than "replace IP-XACT with +htcl." htcl complements IP-XACT; it doesn't compete with it. The +showcase that earns the conversation is a set of generated htcl +wrappers that a Vivado engineer would be happy to publish — ergonomic, +documented, idiomatic. + +### IP as distributed packages + +IP configuration interfaces live in packages that vw resolves as +ordinary dependencies. A Xilinx-published `xilinx-ip` package +(generated from Xilinx's IP-XACT) contains `.htcl` wrappers for each +IP. A custom-IP repository at Oxide ships its own htcl wrappers, +generated from its own IP-XACT. A third party publishes wrappers for +their IP. Consumers add the relevant package to `vw.toml` and `src +@xilinx-ip/axis_register_slice` works the same as any other import. + +There is no IP database, no central registry, no special-case +infrastructure. Repositories of htcl distributed through vw +dependencies *are* the catalog of available configuration interfaces, +decentralized by construction. This matches how the Rust crate +ecosystem works and how Oxide's existing vw-managed VHDL dependencies +work. + +### Scope of htcl as a configuration interface + +htcl describes how to *invoke* an IP. It does not describe the IP's +ports, the IP's memory map, or the IP's parameterized structure — +those are IP-XACT's job, and the artifacts at any specific +configuration come out the other side as generated RTL and RSF. + +What an htcl wrapper proc declares: + +- Parameter names, types, defaults, constraints, and inter-parameter + dependencies — the configuration interface itself. +- Doc comments on each parameter (sourced from IP-XACT descriptions + when generated). +- Doc comments on the wrapper as a whole. + +What an htcl wrapper proc *emits* (in its body): + +- `create_ip` and `set_property` calls that hand the configuration + choice to Vivado. +- Optionally, directives that influence Vivado's wrapper generation + (see "Wrapper documentation" below). + +What an htcl wrapper proc does *not* contain: + +- Port lists. The ports of any specific instantiation come from the + generated VHDL/Verilog wrapper. The space of possible ports across + all configurations is in the IP-XACT specification. +- Memory maps. The register interface of any specific instantiation + comes from RSF generated by an IP-XACT-aware pipeline (see "RSF + generation" below). The space of possible memory maps is in the + IP-XACT specification. + +This scope is the point: htcl is small, focused on the human-ergonomic +configuration layer, and stays out of the description and +instantiation layers where IP-XACT and generated artifacts already +serve well. + +### RSF generation + +Software needs to know the register map of any IP it talks to. RSF is +Oxide's register-spec format and the natural target for this +information. + +The RSF for a specific IP instantiation is a function of two inputs: +the IP-XACT specification (which describes the parameterized memory +map) and the chosen configuration values (which pin the parameters). +The pipeline is: + +``` +IP-XACT spec + chosen parameter values --> RSF for this instance +``` + +This pipeline is not part of htcl, vw, the analyzer, or the REPL. It +is its own tool (call it `ipxact2rsf` or whatever the team names it) +that reads the IP-XACT spec, takes the chosen configuration values +(extracted from the htcl call site, or passed in directly, or queried +from Vivado post-instantiation), and emits RSF. + +What htcl contributes to this pipeline: it is the place where the +configuration values are pinned down in a source-controlled form. The +RSF generator can extract those values by reading the htcl call site, +by running the htcl through to Vivado and querying the instantiated +IP, or by both. The mechanics are for that tool to decide; htcl just +needs to ensure the configuration is recoverable. + +### Wrapper documentation + +Vivado's generated VHDL/Verilog wrappers around configured IP are +notoriously undocumented. Port semantics, parameter effects, and +intended usage patterns are absent from the wrapper file. This is a +real obstacle to using IP correctly and reviewing changes to its +configuration. + +The information flow we want: + +``` +htcl wrapper proc (doc comments on parameters, derived from IP-XACT) + --> create_ip with directives capturing those docs + --> Vivado wrapper generator (would need to honor the directives) + --> generated VHDL with carry-through documentation +``` + +The first step we control (htcl carries the docs). The last step we +want (Vivado emits documented wrappers). The middle step is the open +question: what mechanism gets the doc strings from `create_ip` +arguments into Vivado's wrapper-generation output? Possibilities +include TCL directives Vivado already supports (likely insufficient), +TCL directives Xilinx would need to add (this is what we'd pitch), or +post-processing of generated wrappers by a separate tool (works but +fragile). + +Worth pitching to Xilinx alongside htcl adoption: a documented +mechanism for `create_ip` to accept doc strings that flow into the +generated wrapper. Independently useful even for users who never adopt +htcl. + +## Why this lives in `vw` + +`vw` is already structured library-first: `vw-lib` is the core, the `vw` +CLI is a thin clap-based shim, and `vw-lib` is already consumed by +external tools (e.g., the remote build service for Vivado projects). This +is the same architectural pattern as Cargo's relationship to +rust-analyzer: one library underneath, thin CLIs on top, all tools sharing +the manifest, lockfile, and resolved dependency graph. + +Adding htcl-language support, an analyzer, and a REPL as additional +subcommands of `vw` (or as sibling binaries that share `vw-lib`) gives us: + +- **One manifest, one lockfile, one cache.** `vw.toml` and `vw.lock` + cover VHDL and htcl dependencies uniformly. Packages can ship both + languages from the same git source, versioned together (as + discussed in earlier design conversations: a package version is a + single value across all languages it contains). +- **One LSP serving both languages.** `vw analyzer` is designed from + day one as a multi-language language server. htcl is wired up + natively from the initial analyzer phase; VHDL arrives in a + subsequent phase via a `vhdl_ls` proxy, with eventual replacement + by Oxide's developing VHDL frontend. The user-facing surface (one + LSP per workspace) is stable across that transition. See "LSP + design" for the architecture. +- **Cross-language analysis.** An htcl file that wraps a user VHDL + entity and the VHDL file defining that entity live in the same + workspace, analyzed by the same tool through a shared backend + abstraction. Go-to-definition can cross language boundaries. +- **Shared dependency resolution.** No new resolver, no new fetch + logic, no new cache layout. We add an htcl file-selection layer on + top of `vw-lib`'s existing dependency model, but the resolution + mechanism is unchanged. +- **One mental model for users.** They learn `vw` once and get + everything. + +The model: `vw analyzer` is the LSP (modeled on `rust-analyzer`'s +relationship to Cargo). `vw repl` is the interactive htcl shell. `vw run` +(or similar) executes an htcl script against a Vivado worker. All of +these are thin subcommands over `vw-lib` plus a new `vw-htcl` crate that +holds the htcl-specific analysis. + +## Architecture overview + +``` + +-----------------------------+ + | vw-lib (existing) | + | - manifest / lockfile | + | - dependency resolver | + | - cache management | + | - VHDL file selection | + +--------------+--------------+ + | + +---------------------------+---------------------------+ + | | | + v v v + +----------+-----------+ +-----------+----------+ +-----------+----------+ + | vw CLI (existing) | | vw-htcl (new) | | other vw-lib users | + | vw add / update / | | - htcl parser | | (remote build svc, | + | test / etc. | | - module resolver | | future tools) | + +----------------------+ | - signature check | +----------------------+ + | - htcl -> Vivado | + | TCL emission | + +-----------+----------+ + | + +--------------------------------+--------------------------------+ + | | | + v v v + +---------+----------+ +------------+---------+ +------------+---------+ + | vw analyzer (new) | | vw repl (new) | | vw run (new) | + | LSP server, serves | | interactive htcl | | execute htcl script | + | VHDL + htcl from | | against Vivado | | against Vivado | + | one process | | worker | | worker | + +--------------------+ +----------+-----------+ +----------+-----------+ + | | + +--------------+-----------------+ + | + v wire protocol (newline- + delimited commands + + structured responses) + | + +-------------+--------------+ + | Vivado process | + | (long-lived) | + | - vivado-shim.tcl | + | (small TCL layer that | + | wraps commands and | + | emits JSON for | + | structured results) | + +----------------------------+ +``` + +Key invariants: + +- `vw-lib` is unchanged in spirit; we add new crates beside it + (`vw-htcl`, `vw-analyzer`, `vw-repl`, etc.) rather than restructuring + what exists. +- All language semantics live in Rust. The Vivado shim is a dispatcher + and serializer, nothing more. +- The wire protocol is newline-delimited commands in, structured (mixed + text/JSON) responses out. Hand-written; not RPC-framework-heavy. +- Vivado runs as a long-lived worker process. Cold-start is too expensive + for the hot path. +- The Vivado-driving binaries are self-contained; no Vivado-specific + linking. Distribution is one (or a few) Rust binaries plus the shim TCL + file. + +## Language design + +### Naming + +Working name for the dialect: `htcl`. This is the language name, used as +the file extension (`.htcl`) and as the LSP language identifier. It is not +a separate tool — htcl is a thing `vw` supports, alongside VHDL. + +Open question: final name. Avoid `tcl` in the name to reduce confusion +about it being a TCL implementation; it isn't. + +### Relationship to TCL + +htcl is **not** a TCL superset in the way TypeScript is a JS superset. +Existing Vivado TCL files are not valid htcl. The reasons: + +- We want a structured proc-argument grammar that vanilla TCL can't parse. +- We want static module imports (`src` / `use`) that aren't `source`. +- We want to reject TCL features that defeat static analysis (`upvar`, + `uplevel`, `trace`, dynamic command rewriting). + +However, htcl emits TCL when talking to Vivado, and users can drop down to raw +TCL via an escape hatch for things htcl doesn't model. Existing TCL scripts can +be `source`d through the shim if needed. + +### Proc grammar + +Procs declare structured arguments with per-argument doc comments and attributes: + +```htcl +proc axis_interface { + ## Add a TKEEP sideband signal. Indicates valid bytes in the beat. + @default(0) + has_tkeep + + ## Add a TLAST sideband signal. Indicates the last beat in a packet. + @default(1) + has_tlast + + ## Width of TDATA in bytes. + @default(8) + @enum(1, 2, 4, 8, 16, 32, 64, 128) + tdata_num_bytes + + ## Width of TUSER in bits. Only meaningful if has_tuser is set. + @default(0) + @requires(has_tuser) + tuser_width +} { + # body: emits CONFIG.* set_property calls +} +``` + +Call site uses keyword arguments: + +```htcl +set lrq_request [axis_interface -has_tkeep 1 -tdata_num_bytes 128] +``` + +Attribute set for v1 (extensible): + +- `@default(value)` — value if omitted +- `@required` — error if omitted +- `@enum(a, b, c)` — value must be one of these +- `@range(min, max)` — numeric bounds +- `@requires(other_arg)` — dependency between args +- `@conflicts(other_arg)` — mutual exclusion +- `@deprecated("message")` — soft warning + +Open question: should attributes go before or after the doc comment, or be +order-insensitive? Recommend: doc comments first, then attributes in any order, +then the argument name. Matches Rust/TypeScript conventions. + +### Module system + +Replace `source` with `src` (or `use` — see open questions): + +```htcl +src common/project # relative to current file's directory +src /opt/xilinx/lib/foo # filesystem-absolute (use sparingly, non-portable) +src @quartz/ip/bacd # named dependency, resolved via vw.toml + vw.lock +``` + +Resolution rules: + +- Leading identifier: relative to the directory of the importing file. + Subdirectory traversal allowed (`src ip/cips` is fine). +- Leading `/`: filesystem-absolute. Permitted but discouraged; lint warning + in v1, since these break across machines. +- Leading `@name/`: resolved via `vw.toml`'s `[dependencies.name]` entry. + The cached path comes from `vw-lib`'s resolution (which reads `vw.lock` + and the cache layout under `~/.vw/deps/`). The `@name` prefix means the + same thing as it does to VHDL consumers of vw — same dependency entry, + same commit, same cache directory. +- No upward traversal (`../`) in v1. Force cross-tree references to be + absolute (filesystem or named). + +Extension is implicit: `src foo/bar` resolves to `foo/bar.htcl` (or +whatever extension we settle on). Exactly one extension is recognized; +ambiguity is an error. + +Idempotence: a module is loaded at most once per interpreter run, keyed by +canonical (realpath'd) file path. Repeated `src` calls are no-ops. + +The "project root" — the base for filesystem-absolute imports' meaning of +the manifest, and the directory the analyzer walks for workspace symbols — +is the directory containing `vw.toml`. Same convention as for VHDL. + +Open question: namespace semantics. Does `src foo/bar` populate +`foo::bar::*`, or do top-level definitions land in the global namespace as +they would with `source`? Recommend: top-level definitions are scoped to +the module's namespace by default, with an explicit `export` list +controlling what's visible. Importers use bare names after `src` (the +imports of the module are pulled into the importer's namespace). This is +the bigger semantic change; if too invasive for v1, fall back to +global-namespace semantics and add scoping later. + +### Types — defer until we have usage experience + +Types are out of scope for v1, and we should be in no hurry to add them. The +proc grammar, module system, and dependency manager are the highest-value +features and don't require a type system to be useful. We need real usage +experience with htcl-without-types before we can design types that pay for +themselves. + +The risk of adding types prematurely is well-attested across other +ecosystems: type systems designed before the language has settled tend to +encode the wrong abstractions, become hard to evolve, and impose annotation +overhead that the underlying use cases don't justify. Better to live without +them, collect concrete cases where they would have caught real bugs or +documented intent better, and design to those cases later. + +If types do land eventually, they will be: + +- Optional and gradual. Unannotated code keeps working. +- Focused on HDL-specific concerns where the leverage is clear (likely + candidates: IP handle types so CONFIG.* completion works without flow + analysis, units like Hz/MHz/ns, bit widths). General-purpose typing of + TCL values is not the goal. +- Driven by accumulated evidence, not speculative design. + +The LSP's type-directed CONFIG.* completion (see LSP design section) is +achieved in v1 via flow tracking of IP handles, not a user-facing type +system. The user writes `set lrq [create_ip -name axis_register_slice ...]` +with no annotations; the analyzer infers that `$lrq` holds an +`axis_register_slice` handle. This is a narrow, internal analysis — not a +type system users interact with — and it covers the headline IP-completion +case without committing to broader type design. + +## Dependency management + +Dependency management is `vw-lib`. We do not design a new manifest, a new +lockfile, a new resolver, or a new cache. We extend the existing model in +the minimal ways htcl needs. + +### Manifest and lockfile + +`vw.toml` and `vw.lock`, as they exist today. Reference: +[vw README](https://github.com/oxidecomputer/vw/blob/main/README.md). + +Each dependency entry already specifies `repo` + `branch`/`commit`/`tag` +and a `src` selector for VHDL files. For htcl consumption, we add an +optional `htcl` selector alongside `src`: + +```toml +[dependencies.quartz] +repo = "https://github.com/oxidecomputer/quartz" +branch = "main" +src = "hdl/ip/vhd" # VHDL files (existing; consumed by VHDL flow) +htcl = "hdl/ip/htcl" # htcl files (new; consumed by vw-htcl) +recursive = true +``` + +Either or both selectors may be present. A package that ships only htcl +omits `src`; a package that ships only VHDL omits `htcl`; a package that +ships both (the common case for shared IP wrappers) has both. + +Open question: whether to keep them as separate keys (`src` / `htcl`) or +generalize to a polymorphic selector keyed by file type. The separate-keys +version is the smallest change to vw-lib and the most explicit; keep it +unless there's a reason to generalize. If we later add other languages +(SystemVerilog, Quartus TCL), we add more keys. + +### Versioning model (unchanged from vw) + +- A package version is a single value across all languages it contains. + VHDL and htcl contents of `quartz` ship together at the same commit; no + per-language versions. +- `vw.lock` pins exact commit SHAs, not tags. + +This was previously called out as a design decision; it is, but it's +already the model vw uses, so there's nothing to design. + +### Resolution and fetch (unchanged from vw) + +- `vw update` fetches and locks. Same command, same semantics, now also + resolves htcl dependencies if `htcl` selectors are present. +- Cache at `~/.vw/deps/-/`, unchanged. +- Authentication defers to git, unchanged. + +### What `vw-htcl` does on top of `vw-lib` + +A thin file-selection and module-resolution layer: + +- Given a resolved dependency from `vw-lib`, find the htcl files within + it using the `htcl` selector. Same selector semantics as the existing + `src` field (directory / single file / glob). +- Build an index from `@name` to the resolved dependency's htcl root + directory. +- The htcl module resolver consults this index when it sees `src + @name/path`. + +This is the only dependency-related code htcl needs to write. Everything +upstream of "I have a cache directory for `@quartz`" is already done. + +### Path dependencies + +`vw` currently supports git-sourced dependencies. For monorepo/sibling- +checkout development loops, htcl's plan called for `path = "../foo"` +dependencies. Confirm whether `vw-lib` currently supports this; if not, +adding it is a small extension that benefits both VHDL and htcl +consumers. Likely worth doing in `vw-lib` itself rather than as +htcl-specific behavior. + +### Coordination questions for `vw-lib` + +Things to confirm before phase 1 implementation, and possibly upstream +changes to schedule: + +1. **Does `vw-lib` expose a stable Rust API for "give me the resolved + path for dependency X"?** Yes (per the existing remote-build-service + consumer), but confirm the exact shape — a `Resolved { name, commit, + path, src_files }` struct, or similar — so the htcl resolver can + consume it cleanly. +2. **Is the `src`-selector logic factored well enough to reuse for `htcl` + selectors?** Ideally yes; the directory / single-file / glob handling + is generic and shouldn't be reimplemented. +3. **Path dependencies:** present, absent, or in progress? +4. **Workspace concept:** if `vw.toml` ever grows multi-package workspace + support (cargo-style), how does that interact with htcl? Likely fine, + but worth a thought. + +## Wire protocol + +### Transport + +Vivado is spawned once per workflow run (or per LSP session) and stays alive. +htcl talks to it over stdin/stdout pipes. No sockets, no daemon, no +multi-user complexity in v1. + +### Request format + +Newline-delimited commands. Each command is a JSON object: + +```json +{"id": 42, "op": "eval", "tcl": "set_property -dict {CONFIG.HAS_TKEEP 1} $lrq"} +{"id": 43, "op": "eval_structured", "tcl": "report_property -all $cell"} +``` + +`id` is a monotonic request ID for matching responses. + +Two ops in v1: + +- `eval`: run the TCL, return the result as a string (or error info). +- `eval_structured`: run the TCL through a wrapper that emits JSON for known- + structured commands. The shim has a dispatch table from command name to + wrapper function. + +### Response format + +```json +{"id": 42, "ok": true, "result": ""} +{"id": 43, "ok": true, "result": {"CONFIG.HAS_TKEEP": "1", ...}} +{"id": 44, "ok": false, "error": {"message": "...", "code": "...", "info": "..."}} +``` + +For `eval_structured`, `result` is the JSON shape produced by the per-command +wrapper. For `eval`, it's a string. + +### Vivado-side shim + +A small TCL file (`vivado-shim.tcl`) loaded into Vivado at worker startup. It: + +- Reads newline-delimited JSON from stdin. +- Dispatches to a handler per `op`. +- For `eval`, calls `uplevel #0 $tcl`, captures result or error, emits + response. +- For `eval_structured`, parses the command name from the TCL, looks up a + wrapper, runs the wrapper to produce JSON. +- Wrappers are hand-written per command family. Start with: `report_property`, + `report_timing` (summary), `get_cells` / `get_pins` / `get_nets` / + `get_clocks` lists, `list_property`. Grow as needed. + +Most commands don't need a structured wrapper. Default to passthrough as a +string; opt into structure for the commands where text parsing on the Rust +side would be painful. + +### Batching and fencing + +v1: one request, one response. No batching. If round-trip latency becomes a +measured bottleneck, add a `batch` op that runs a list of commands with +fence semantics (stop on first error, or run-all-collect-errors). + +## LSP design + +LSP is a first-class concern, not a feature bolted on late. This +section describes the design in detail because LSP quality is the primary +goal of the project, and because the rest of the language design either +serves the LSP or constrains it. + +### Scope: a multi-language LSP, htcl first, VHDL next + +`vw analyzer` is designed from the start as a multi-language LSP for +the entire HDL workflow, not as an htcl-only LSP that might grow VHDL +later. The bulk of the initial implementation focus is htcl — that's +where the new language design and the headline complexity-management +features live — but the LSP server architecture, the workspace model, +and the configuration shape all assume a multi-language future and +must accommodate it from day one. + +The motivating reality: + +- Oxide's HDL work spans both htcl (this project) and VHDL (extensive + existing codebase, the larger of the two by volume). +- Today, VHDL editor support comes from `vhdl_ls`, the open-source + VHDL language server, configured via `vhdl_ls.toml` that `vw` + generates. This works well and isn't going anywhere short-term. +- Long-term, Oxide is developing its own VHDL frontend and synthesizer + as part of a complete VHDL stack. The eventual goal is for `vw + analyzer` to integrate directly with that frontend, replacing + `vhdl_ls`. + +The path: `vw analyzer` serves htcl natively from day one, and +provides VHDL support initially by proxying to `vhdl_ls`, eventually +by integrating with the Oxide VHDL frontend. The user-facing surface +(one LSP serving both languages from a unified `vw.toml` workspace) +is stable across that transition; the implementation under the hood +changes. + +Two consequences for the htcl-focused work in this plan: + +1. The LSP server architecture is multi-language from phase 3, even + while only htcl is wired up. The language-backend abstraction + exists; it has exactly one implementation initially. +2. The htcl-side analysis must surface cross-language queries through + that abstraction rather than directly to a specific VHDL + implementation. This keeps the abstraction honest from day one and + means the eventual swap from `vhdl_ls` to the Oxide frontend + doesn't ripple into htcl-side code. + +### Architectural principle: one source of truth per language + +The LSP is **not** a separate analyzer with its own parser and signature +checker. It is the same code as the CLI, exposed over the LSP protocol. +For htcl, the parser, name resolver, signature checker, and +(eventually) any type analysis are written once and used by both `vw +run` / `vw check` and `vw analyzer`. For VHDL (later), the same +discipline applies via whichever backend serves VHDL at the time. + +This matters because the dominant failure mode of language tooling is +divergence between "what the compiler does" and "what the IDE shows." +The moment they're separate implementations, they drift, and users +learn not to trust the IDE. Sharing the implementation is the only +durable fix. + +Concretely: + +- All htcl semantic analysis lives in the `vw-htcl` crate, consumed by + every subcommand that needs it (`vw run`, `vw check`, `vw analyzer`, + `vw repl`). +- VHDL analysis lives behind a language-backend abstraction (see + below). Initially: `vhdl_ls` proxy. Later: direct integration with + the Oxide VHDL frontend. htcl-side code talks to this abstraction + for cross-language queries, never to a specific VHDL implementation. +- `vw analyzer` is the LSP server process. It does protocol plumbing + (LSP request/response, text-sync, capability negotiation), dispatch + by file type, and cross-language coordination. It contains minimal + language logic of its own. +- `vw check` runs the same analyses the LSP runs on save, with the + same diagnostics. CI uses `vw check`; editors use `vw analyzer`. + Same diagnostics either way. + +### Language backend abstraction + +The LSP server dispatches per-file based on extension and routes +requests to a language backend. Each backend implements a common +trait (working name `LanguageBackend`): + +```rust +trait LanguageBackend { + fn diagnostics(&self, file: FileId) -> Vec; + fn hover(&self, file: FileId, pos: Position) -> Option; + fn completion(&self, file: FileId, pos: Position) -> Vec; + fn definition(&self, file: FileId, pos: Position) -> Vec; + fn document_symbols(&self, file: FileId) -> Vec; + // ... cross-language query surface, see below ... + fn find_symbol(&self, query: SymbolQuery) -> Vec; +} +``` + +(Exact shape is for implementation to decide; the point is the +abstraction exists from day one.) + +Initial implementations: + +- `HtclBackend` — uses `vw-htcl` directly. Native, in-process. +- `VhdlBackend` (initial) — `vhdl_ls` proxy. Spawns `vhdl_ls` as a + subprocess and forwards file-scoped requests over the standard LSP + protocol. The proxy generates `vhdl_ls.toml` from `vw.toml` (which + `vw` already does for standalone editor support) and points the + subprocess at it. + +Later, the `VhdlBackend` is replaced by a direct integration with the +Oxide VHDL frontend — same trait, different implementation. Call +sites in `vw analyzer` and in `HtclBackend` don't change. + +The cross-language query surface (`find_symbol` above, plus whatever +else accumulates as cross-language features grow) is the contract +that lets htcl ask "is there a VHDL entity named X?" without caring +which backend answers. For the `vhdl_ls` proxy, `find_symbol` is +implemented by querying `vhdl_ls` with `workspace/symbol` and +translating the results. For the Oxide frontend, `find_symbol` is a +direct API call. Same shape from the htcl side. + +### Cross-language analysis (htcl ↔ VHDL) + +A different case from the IP-XACT-generated wrapper flow described in +Strategic Context: hand-written htcl that wraps user VHDL entities. A +team's own VHDL design has entities with generics, and an htcl proc +gives those entities ergonomic instantiation interfaces with doc +comments, defaults, and validation. Because `vw analyzer` sees both +languages (through the backend abstraction), it can offer +cross-language navigation between the htcl wrapper and the underlying +VHDL entity: + +- **Go-to-definition from htcl into VHDL.** An htcl proc that wraps a + VHDL entity — e.g., `instantiate_uart` taking parameters that map + to the `uart` entity's generics — can declare its target entity via + an attribute (likely `@vhdl_entity(uart)`). When the user invokes + go-to-definition on the proc call, the LSP server asks the VHDL + backend for the location of entity `uart` and returns it. The + htcl-side code that issues this query doesn't know whether + `vhdl_ls` or the Oxide frontend answered. +- **Find references across languages.** "Find references" on a VHDL + entity surfaces both VHDL instantiations (from the VHDL backend's + references query) and htcl wrapper procs that target it (from the + htcl backend's index of `@vhdl_entity` attributes). +- **Generic-to-argument mapping.** If an htcl wrapper declares which + of its proc arguments map to which VHDL generics, the htcl backend + queries the VHDL backend for the entity's generic list and checks + for missing or extra mappings. Warns on drift when the entity's + generic list changes. + +Note: this is distinct from IP-XACT-generated wrappers. Those target +Vivado IP via `create_ip` and don't have a VHDL entity in the +workspace to navigate to — the entity comes out the other side as +generated RTL. Cross-language analysis applies to user-authored +htcl-over-VHDL, not to vendor-IP wrappers. + +Open question: how aggressively to pursue cross-language features in +v1. The minimum is "go-to-definition from htcl into VHDL"; the rest +is nice to have. I'd recommend the minimum lands in v1 (it's the +headline demo for the multi-language model) and the more +sophisticated checks come after. + +### Incremental analysis + +A useful LSP must re-analyze on every keystroke. The analysis layer is +designed for this from the start, not retrofitted. + +Approach: + +- The unit of caching is the file (module). Parsing a file produces a syntax + tree; resolving its imports produces a module-level binding. Files cache + their parsed and resolved state, keyed by content hash. +- Cross-module analysis (resolving an import, looking up an external proc + signature) reads from the cache. A change to a file invalidates that file + and any file that depends on it, transitively. +- Consider `salsa` (the framework rust-analyzer uses) for memoization and + invalidation. It's heavy machinery for a small project, but it solves + exactly this problem and the alternative is hand-rolling the same thing + badly. Decide after phase 0 whether to adopt it; for the smallest possible + v1, a hand-rolled cache keyed on file mtimes is fine. +- Parsing should be tolerant of incomplete input. The user is in the middle + of typing; the parser must produce a usable AST with error nodes rather + than bailing on the first syntax error. This shapes the parser choice + (see below). + +### Parser + +The parser is the foundation of every LSP feature. Built with +[`winnow`](https://docs.rs/winnow), the parser library used pervasively +across Oxide. Familiarity, code-review consistency, and shared idioms with +the rest of the codebase outweigh case-by-case evaluation of alternatives. + +Requirements the implementation must meet within winnow: + +- **Error-tolerant.** Recover from syntax errors and continue parsing. A + half-typed proc declaration should still produce a tree where the rest + of the file is analyzable. Winnow's `cut_err` and combinator-level + recovery are the building blocks; design recovery points around + statement boundaries (newline-terminated top-level forms, proc bodies, + `src` statements). +- **Position-preserving.** Every node knows its source span. Winnow's + `Located` adapter or equivalent span-tracking is used throughout. No + AST node without a span. +- **CST-shaped, trivia-preserving.** The output is a concrete syntax tree + that retains whitespace and comments, not a stripped abstract syntax + tree. We need comments for doc-comment extraction and trivia for + accurate formatting (`vw fmt` is a likely future feature). The AST + layer used by name resolution and signature checking is derived from + the CST. +- **Reusable across editor and CLI.** Same parser code runs in `vw run`, + `vw check`, `vw analyzer`, `vw repl`. No editor-only or CLI-only + variants. + +Incremental reparse is deferred. Most htcl files will be small enough +that full reparse per edit is fast; revisit if measurement shows +otherwise. If incremental parsing becomes necessary later, the CST +boundary makes it tractable to swap in a different strategy for hot +paths without rewriting downstream analysis. + +Open question: how to structure the CST → AST lowering. Two reasonable +shapes: (a) a single AST with optional trivia attached to nodes, or (b) +a separate AST that holds references back into the CST for source +positions. Pick after the first non-trivial grammar pass. + +### Feature inventory + +Each feature has acceptance criteria specific enough to be implementable. + +#### Completion + +What completion offers depends on cursor context. The completion system +needs a notion of "what kind of position is this," determined by the +surrounding syntax tree. + +Positions and their completion sets: + +- **Top-level statement.** Suggest: keywords (`proc`, `src`, `set`, control + flow), in-scope procs, in-scope variables. +- **After `src `.** Suggest: relative module names (subdirectories and + `.htcl` files reachable from the current file), `/` to start a filesystem + path, `@name/` for declared dependencies. After `@name/`, suggest paths + within that dependency. +- **Command position (start of a statement).** Suggest in-scope procs and + Vivado builtins. For Vivado builtins, the suggestion source is a + generated table from UG835 (see "Vivado builtins" below). +- **Argument position of a known proc call.** If the cursor is after + `axis_interface ` and the next token is `-`, suggest the proc's keyword + arguments. If the cursor is after `-has_tkeep `, suggest values + appropriate to that argument's type / `@enum` set. +- **Inside a `$variable` reference.** Suggest in-scope variable names. +- **Inside an attribute (`@`).** Suggest known attribute names + (`@default`, `@required`, `@enum`, etc.) and, where appropriate, + their arguments. + +Note: parameter completion on IP instantiation sites (the +highest-value HDL use case) is the same code path as proc-argument +completion above. An IP wrapper is an htcl proc; its parameters are +proc arguments with attributes; completion works the same way as for +any other proc. There is no special-case "IP property" completion. + +Acceptance criteria: +- Completion responds in <50ms for files under 1000 lines. +- Completion items include `detail` (short type/signature info) and + `documentation` (full doc comment) fields. +- Snippets supported for procs with required arguments — completing a proc + call inserts the proc name plus placeholders for required keyword args. + +#### Hover + +Acceptance criteria: +- Hovering on a proc name shows the proc's doc comment, signature + (arguments with their attributes), and source location. +- Hovering on a proc argument at a call site shows that argument's doc + comment, default value, and any attributes (`@enum`, `@range`, etc.). +- Hovering on a `src` import shows the resolved file path and, if + available, the module's top-level doc comment. +- Hovering on a Vivado builtin shows UG835-derived documentation. +- Hovering on an IP wrapper proc (imported from a vw package) shows + its doc comment and per-parameter docs, same as any other proc. If + the package was generated from IP-XACT, that documentation flows + through unchanged. + +#### Diagnostics + +Diagnostics are produced by the same analyzer the CLI uses. The LSP just +ships them over the wire. + +Categories: + +- **Syntax errors.** Parse failures, recovered to the best position the + parser can manage. +- **Unresolved imports.** `src foo/bar` where `foo/bar.htcl` doesn't exist. +- **Unknown procs.** Call to a name that isn't defined or imported. +- **Argument errors.** Unknown keyword argument, missing required argument, + value outside `@enum` or `@range`, `@requires` / `@conflicts` violation. +- **Unused declarations.** Unused imports, unused local variables. Warning + level, suppressible. +- **Deprecation warnings.** Call sites of procs marked `@deprecated`. + +Note: there is no separate "IP property error" diagnostic category. +Unknown arguments to an IP wrapper proc are caught by the standard +"unknown keyword argument" check; out-of-range values are caught by +the standard `@enum` / `@range` check. The diagnostics machinery +doesn't distinguish IP wrappers from other procs. + +Each diagnostic has: source range, severity, message, optional related +information (e.g., "this is the proc declaration whose required argument +you're missing"), optional code action (e.g., "add missing argument"). + +Acceptance criteria: +- Diagnostics update within 200ms of an edit. +- Every diagnostic has a precise source range, not just a line number. +- Diagnostics are stable: editing an unrelated part of a file doesn't + cause diagnostics elsewhere to flicker. + +#### Go-to-definition + +- **Proc reference → proc declaration.** Across files, following imports. +- **Variable reference → assignment.** Within a scope; "definition" for a + variable is its first assignment in the current scope or a containing + scope. +- **`src` target → the imported file.** Open the imported `.htcl` file. +- **Vivado builtin → UG835 entry.** Either open a generated stub file with + the documentation, or open the UG835 URL. Implementation-defined; the + point is the user can find the docs. + +#### Find references + +- For procs, find all call sites and any explicit references (passing as a + value, etc.). +- For variables, find all reads and writes in scope. +- For modules, find all `src` statements that import them. + +Acceptance criteria: +- Find references on a proc returns results across the whole project, + searching all `.htcl` files transitively reachable from the project root. +- Results include the source range and one line of context. + +#### Rename + +Lowest priority but high value when it works. Rename a proc, variable, or +module and update all references atomically. + +Caveats: +- Renaming across the project boundary (into dependencies) is forbidden. +- Renaming requires the LSP to be confident about every reference; if any + reference is ambiguous (e.g., dynamically constructed), abort with an + error rather than rename incorrectly. + +#### Document symbols and workspace symbols + +- Document symbols: every proc, top-level variable, and module-level + declaration in the current file. Used for the editor's outline view. +- Workspace symbols: same, across the project. Used for "go to symbol in + project" pickers. + +#### Code actions + +A small set in v1, expanded over time: + +- "Add missing required argument." +- "Remove unused import." +- "Convert raw `set_property -dict` to a structured IP configuration call." + (Big one for migration off existing Vivado TCL.) +- "Extract selection to proc." + +#### Formatting + +`htcl fmt` is a separate CLI command; the LSP exposes it via the +`textDocument/formatting` request. Formatter implementation is a phase past +v1, but the architectural slot for it should exist from the start (the CST +must preserve enough information to reformat). + +### Configuration completion on IP instances + +The highest-impact LSP feature for HDL work is parameter completion at +IP instantiation sites. The user imports an IP from a vw dependency: + +```htcl +src @xilinx-ip/axis_register_slice +# ... +set lrq [axis_register_slice -has_tkeep 1 -tdata_num_bytes | + ^cursor here +``` + +The analyzer offers completion for the IP's parameters (`tuser_width`, +`tdest_width`, etc.), with hover documentation, defaults, and `@enum` +constraint values pulled from the proc's declared signature. + +Crucially: **this is the same code path as completion on any other +htcl proc.** The IP wrapper is a proc; the proc has structured +arguments with attributes (per the proc grammar); completion of those +arguments works the same as completion of arguments on a hand-written +proc. There is no special-case "IP property" subsystem in the +analyzer. + +This is the architectural payoff of treating IP as ordinary htcl +packages distributed through vw dependencies: the LSP doesn't need to +know anything about IP-XACT, IP catalogs, or Vivado-specific +introspection. It just analyzes htcl. + +Flow tracking of IP handles is still useful for downstream features — +"this variable is an instance of `axis_register_slice`, so when it's +passed to `connect_axis`, here's what we can validate" — but it's a +narrow extension of proc return-type tracking, not a separate +mechanism for IP. Defer until the cross-IP wiring story matures. + +### Vivado builtins + +htcl needs knowledge of Vivado's built-in TCL commands (`get_cells`, +`report_timing`, `current_design`, etc.) to provide completion and +hover for them. These aren't IP — they're the underlying Vivado +language surface htcl sits on top of. + +Sources: + +- UG835 (the Tcl Command Reference) parsed into a structured form. The + doc has consistent enough structure to be machine-readable, though + it's not trivial. +- `help ` output from a live Vivado, scraped at + builtin-data-generation time. +- Hand-written annotations layered on top for things UG835 gets wrong + or doesn't explain. + +The result ships with vw (in a `vw-vivado-data` crate, or similar) as +a generated data file consumed by the analyzer. Regenerated per Vivado +release; the version targeted in `vw.toml` selects which data file is +used. + +### LSP server implementation + +- Crate: `vw-analyzer`, a binary crate. Invoked as `vw analyzer` from + the `vw` CLI dispatcher, or directly via `vw-analyzer`. +- Framework: `tower-lsp` is the standard Rust LSP framework, + well-maintained and used by rust-analyzer-adjacent projects. Use it + unless there's a specific reason not to. +- Transport: stdio. The editor spawns `vw analyzer` and talks to it. +- Concurrency: file analysis runs on a worker thread pool; the protocol + handler thread stays responsive to cancellation requests. + Long-running analyses (full project re-resolution) are cancellable. +- Language scope: htcl and VHDL in one server process, dispatched + per-file via the `LanguageBackend` abstraction (see "Language + backend abstraction" above). htcl is wired up natively from phase 3; + VHDL is wired up via the `vhdl_ls` proxy in a subsequent phase, with + eventual replacement by the Oxide VHDL frontend. Cross-language + queries are first-class through the backend trait. + +### Editor integration + +VS Code is the primary target. A minimal extension: + +- Activates on `.htcl` and `.vhd`/`.vhdl` files, and on workspaces + containing `vw.toml`. +- Spawns `vw analyzer` as the language server. +- Ships a TextMate grammar for htcl syntax highlighting (VS Code's + native highlighting format). A tree-sitter grammar can come later if + we want to support editors that consume those directly (Zed, Neovim + with `nvim-treesitter`). The editor-side highlighting grammar is + separate from the LSP parser; the LSP uses winnow, while highlighting + is whatever the editor consumes. +- Provides commands: "vw: restart analyzer," "vw: update dependencies," + "vw: show IP property reference." + +Open question: relationship to any existing vw VS Code extension. If one +exists, extend it; if not, this is a new package. Either way, one +extension per project, not separate VHDL and htcl extensions. + +Other editors (Neovim, Emacs, Helix, Zed) get LSP support for free via +their generic LSP clients; we don't ship extensions for them initially, +but configuration snippets in the README are a low-cost way to support +them. + +### LSP testing strategy + +LSP regressions are easy to ship and hard to notice. Test infrastructure +from the start: + +- Snapshot tests for analysis output. Each test is a small `.htcl` fixture; + the expected output (diagnostics, symbol tables, completions at marked + positions) is a checked-in snapshot. Mismatches fail the test. +- End-to-end LSP tests using a test client that speaks the protocol. Verify + that a `textDocument/completion` request at a given position returns the + expected set of items. +- Don't test through VS Code. Test the LSP server directly; the VS Code + extension is a thin enough wrapper that manual smoke-testing is fine for + it. + +### Phasing within the LSP work + +The analyzer is built incrementally alongside the rest of the language, +with `vw analyzer` introduced as a real subcommand at phase 3 and +growing features as later phases land: + +- **Phase 3 (analyzer initial, htcl only):** `vw analyzer` binary + exists. `LanguageBackend` abstraction in place with `HtclBackend` as + the sole implementation. Provides diagnostics, document symbols, + go-to-definition for `src` targets and proc references, hover for + proc docs, completion for proc arguments. Crucially, this includes + parameter completion on IP wrapper procs imported from vw packages + — the headline IP-completion case falls out of the proc-argument + completion path. This is the point where the analyzer is genuinely + useful for htcl. +- **Phase 4 (structured wire responses):** No direct analyzer impact, + but enables typed result handling that the REPL builds on. +- **Phase 5 (VHDL via vhdl_ls proxy):** `VhdlBackend` lands as a + proxy to `vhdl_ls`. `vw analyzer` now serves both languages from a + single process; the user-facing multi-language LSP surface is in + place. +- **Phase 6 (cross-language):** htcl ↔ VHDL go-to-definition and find + references, building on the backends from phase 5. +- **Phase 8 (polish):** Find references across the workspace, rename, + workspace symbols, code actions, performance tuning, editor + extension packaging. + +Note: the analyzer benefits from being built alongside language +features rather than after them. Each language feature (modules, proc +grammar, cross-language) lands its analyzer support in the same phase +that introduces the feature. The "phase 8 polish" pass is for +analyzer-only features that don't have an underlying-language +counterpart. + +The eventual replacement of the `vhdl_ls` proxy with direct Oxide VHDL +frontend integration is a "Later" item (see implementation plan). +Because the swap stays within the `LanguageBackend` abstraction, +no htcl-side code needs to change when it happens. + +### Non-goals for the LSP + +- Debugging protocol (DAP). Out of scope; debugging happens inside Vivado. +- Semantic tokens for syntax highlighting. Editor-side grammars are cheaper + and good enough. +- Inlay hints. Possibly later; not needed for v1. +- Refactorings beyond rename and the small code-action set listed above. + +## REPL design + +The REPL is, architecturally, the same product as the analyzer with a +different presentation layer. The LSP serves an editor; the REPL serves +a TUI. Both query the same `vw-htcl` analysis. This isn't a coincidence +to exploit — it's the design. + +The traditional captive-CLI help model (Vivado's `help foo`, Cisco IOS's +`?`) was a 1990s answer to "I don't have a graphics-capable terminal but +I have screen-clearing escape codes." It conflates discovery (what +exists?), reference (what does this do?), and navigation (where am I?) +into a single text-dump idiom that clutters scrollback and answers none +of those questions well. With a modern TUI and the analyzer's data +already on hand, we can do substantially better without reimplementing +anything. + +Built with [`ratatui`](https://ratatui.rs/), the TUI library used +pervasively across Oxide. Line editing uses +[`reedline`](https://docs.rs/reedline) — Nushell's modern readline +replacement, well-suited to hint and menu rendering. Both choices are +Oxide-conventional rather than case-evaluated; same reasoning as winnow. + +### Architectural principle: history hygiene + +The defining UX commitment: anything the user explicitly ran (commands +and their results) belongs in scrollback. Anything that was a navigation +aid (completion menus, signature help, hover dialogs, help overlays) +does not. Navigation aids appear in transient overlays or inline +ghost-text and disappear when the user moves on. The scrollback is what +the user chose to do, not how they figured out what to do. + +This is the failure mode of Vivado's REPL: a `help` invocation dumps +200 lines into history, and the user's actual work is buried. Ours +won't. + +### Virtual document model + +The REPL maintains an in-memory document representing the session: +successful evaluations are appended, and the current input line is +treated as the tail. The analyzer's queries operate on (document + +current input), with the cursor positioned within the current input. + +Consequences: + +- Variables and procs defined earlier in the session are in scope for + completion, hover, and diagnostics on the current input. +- Diagnostics on the current line surface *before* the user submits it + — a typo'd argument name is flagged inline, not after Vivado returns + an error. +- Sourced modules contribute their definitions to the session document, + so completion includes everything reachable from the import graph. + +The same analyzer code that powers `vw analyzer`'s editor support +powers the REPL's interactive features. The analyzer doesn't know +whether it's serving an editor or a TUI. + +### Feature inventory + +#### Tab completion + +The primary discovery mechanism. Triggered on Tab, optionally +auto-suggested as a menu after a short typing pause. + +Completion sources match the LSP's: in-scope procs, proc arguments at +call sites (including arguments on IP wrapper procs imported from vw +packages), `@enum` values, variable names, module imports, and Vivado +builtins. + +Rendered as a popup menu *below* the input line. Arrow keys navigate; +Enter or Tab accepts; Escape dismisses. The menu does not enter +scrollback. + +#### Signature help while typing + +When the user is partway through a proc call, a non-intrusive line +*below* the input shows the proc's signature with the current argument +highlighted. The current value's `@enum` or `@range` constraint is +shown alongside. + +The signature line updates as the user types and disappears as soon as +they move past the call. Never enters scrollback. + +#### Modal help overlay + +Bound to F1 (or `?` if a more keyboard-friendly approach is preferred). +Pops a transient overlay — a centered panel or split-pane — with the +full documentation for the symbol under the cursor: proc signature, +all argument docs, attribute constraints, source location, related +procs. For IP wrappers, this surfaces the same documentation that's in +the proc's doc comments and parameter attributes — which (for +IP-XACT-sourced packages) carries the IP-XACT descriptions through to +the user. + +Dismissed with Escape. Nothing lands in scrollback. + +This is what Vivado's `help` command should have been: a brief takeover +of the screen that returns control unchanged. + +#### Inline ghost-text suggestions + +As the user types, faintly render the most likely completion in dim +text ahead of the cursor (fish-shell / Copilot style). Right-arrow or +Tab accepts. Any other key dismisses. + +For HDL workflows where proc and argument names are long and +repetitive (`tdata_num_bytes`, `axis_register_slice`), this saves real +keystrokes. Reedline supports this natively. + +#### Discoverable command palette + +Bound to Ctrl-P (or similar). Opens a fuzzy-searchable overlay listing +in-scope procs, recent commands, and (optionally) workspace symbols. +Same data the LSP uses for `workspace/symbol`. + +The failure mode of current Vivado REPLs is "I know I want to do X but +I don't remember what it's called." This is the fix. + +#### Per-instance exploration + +Dedicated mode for the common HDL workflow question: "I have an IP +instance; what are all its current properties and values in the live +Vivado design?" Triggered by `:describe ` or by a hotkey on a +variable in scope. + +Renders a sortable, filterable table built from live `report_property` +data on the instance, joined with the IP wrapper proc's parameter +documentation (so each property has its description and constraints +visible). Navigable with arrow keys; Enter on a property opens its +full documentation. Escape closes. + +Substantially better than `report_property` dumped into scrollback as +text. + +#### Pretty-printed structured results + +When `eval_structured` (phase 4) returns a typed result, the REPL +renders it as a navigable structure rather than a flat string. Timing +reports become collapsible trees; property dumps become tables; lists +of cells/pins/nets become selectable lists where each entry can be +hovered for details. + +The text representation is still available — `:plain` or a config +option turns off pretty-printing for screencasts and pipe-friendly +output. + +#### Lightweight text help fallback + +A `:help foo` command (or similar) prints help to scrollback as plain +text. Useful for SSH over slow links, grepping history, screencasts, +and copying into chat/issues. The TUI overlay is the default for +interactive use; the text command exists for cases where the overlay +isn't what's wanted. + +This is the one place we accept scrollback clutter, because it's the +user explicitly asking for it. + +### Implementation notes + +**Debouncing.** Analysis runs on keystrokes; if it takes more than +~20ms the UI feels laggy. Debounce completion and hover queries (fire +~50ms after input stability), and run analysis on a worker thread that +the ratatui frame loop polls. + +**Cancellation.** A new keystroke invalidates the previous analysis +request. The analyzer is already cancellable (LSP requirement); the +REPL inherits that. + +**History.** Persistent across sessions, stored in +`~/.local/state/vw/repl-history` (or platform-equivalent). Reedline +handles this. + +**Multiline input.** htcl procs span multiple lines; the editor must +support multi-line buffers with proper indentation. Reedline supports +this; pair with the parser to detect when a buffer is syntactically +complete vs. needs more lines. + +**Vivado worker lifecycle.** A REPL session corresponds to one +long-lived Vivado worker. Cold start happens at REPL launch (with a +spinner during the multi-second Vivado startup); the worker persists +until exit. `:restart` rebuilds the worker without exiting the REPL. + +**Module hot reload.** If a sourced module changes on disk, the REPL +detects it (file watcher), re-sources, and updates the session +document. The user keeps any session-local definitions made after the +module was first loaded. Conflicts (same name now means something +different) are surfaced as warnings. + +### Phasing within the REPL work + +The REPL doesn't need to wait for every other phase to land. It can +ship with a meaningful subset early and grow: + +- **Initial REPL (phase 7 below):** ratatui shell, reedline line + editor, tab completion, signature help, history, multi-line input, + Vivado worker integration, pretty-printed results (phase 4 has + already landed by this point). Parameter completion on IP wrapper + procs works the same as parameter completion on any other proc — + no separate path. +- **`:describe` for live instances:** lands when wired up; depends on + the structured-wire-response work from phase 4 to read live + properties cleanly. +- **Polish phase (alongside LSP phase 8):** Command palette, modal + help overlay, ghost-text suggestions, file-watcher-based module hot + reload. + +### Non-goals for the REPL + +- Mouse interaction. Keyboard-only TUI. Mouse support is an + accessibility win we can add later; not v1. +- Replacing the Vivado GUI. The REPL is for scripted/exploratory + workflows; users who need waveform viewers and floorplanning still + use Vivado proper. +- Persistent named sessions / tmux-style detach. Run inside tmux if + you want that. +- Custom keybinding configuration in v1. Pick sane defaults; expose + config later if requested. + +## Implementation plan + +The plan is organized around extending `vw` with new crates and +subcommands. The existing `vw-lib` and `vw` CLI are not restructured; we +add alongside. + +New crates introduced over the phases: + +- `vw-htcl` — htcl parser, AST, name resolution, signature checking, + TCL emission. The language layer. +- `vw-vivado` — Vivado worker spawn/connect, wire protocol, embedded + shim TCL. The execution layer. +- `vw-vivado-data` — generated database of UG835 builtin commands. + Regenerated per Vivado release; not user-edited. +- `vw-analyzer` — LSP server. Binary. +- `vw-repl` — interactive shell. Binary. + +Not in this project (separate downstream tooling): + +- IP-XACT → htcl wrapper generation. A sideband tool that reads an + IP's IP-XACT `component.xml` and emits an `.htcl` configuration + interface (a wrapper proc whose parameters match the IP's + parameters). Lives in its own repo, ships its own binary, produces + vw-consumable packages. The output is ordinary htcl that vw doesn't + need to know was generated. See "Strategic context" section. +- IP-XACT + configuration values → RSF. A separate tool that produces + the register-spec file for a specific IP instantiation. Reads the + IP-XACT memory map and the configuration values, emits RSF. Not in + vw; see "RSF generation" in Strategic Context. + +The `vw` CLI grows subcommands `run`, `check`, `repl`, `analyzer` (plus +existing `add`, `update`, `test`, etc.). + +### Phase 0: skeleton + +Goal: smallest end-to-end thing that proves the architecture. + +- New crates `vw-htcl` and `vw-vivado` created in the vw repo. +- `vw run` subcommand added to the CLI dispatcher. +- htcl parser for a minimal subset: literals, variables, `set`, `proc`, + command invocation, comments. No control flow yet. Built with + [`winnow`](https://docs.rs/winnow); see the LSP design section's + "Parser" subsection for the full rationale and requirements. +- Vivado worker spawn-and-connect logic. +- Vivado shim with `eval` op only. +- `vw run file.htcl` reads the file, sends each top-level command to + Vivado, prints results. + +Deliverable: `vw run hello.htcl` where `hello.htcl` is `puts "hello"` +prints `hello`. + +### Phase 1: module system + +Goal: `src` works. + +- Use `vw-lib` to find the project root (location of `vw.toml`). +- Implement `src` with relative and filesystem-absolute resolution. +- `@name/...` resolution: query `vw-lib` for the dependency's resolved + cache path, then index into it via the `htcl` selector. +- Module loading: parse file, execute top-level forms, track loaded set + for idempotence. +- Decide and implement namespace semantics (see open question above). +- Coordinate `vw-lib` extensions: the `htcl` selector key in dependency + entries; path dependencies if not already supported. + +Deliverable: a multi-file project loads and runs, including imports from +a `vw`-managed dependency. + +### Phase 2: proc grammar + +Goal: structured proc declarations with attributes. + +- Extend parser for the proc-arg grammar (doc comments, attributes, + names). +- AST representation for procs with metadata. +- At call time, validate keyword args against the declared signature: + required args present, no unknown args, `@enum` / `@range` / + `@requires` / `@conflicts` checked. +- Generate a TCL-side `proc` that takes positional args in canonical + order; callers pass keyword args, vw-htcl reorders them and emits a + positional call. +- Introduce `vw check` subcommand that runs analysis and reports + diagnostics without executing. + +Deliverable: the `axis_interface` example from the language design +section works end-to-end with validation, and `vw check` flags malformed +calls. + +### Phase 3: analyzer (LSP) — initial version + +Goal: editor support lands as soon as the analysis is meaningful. + +- `vw-analyzer` crate, `vw analyzer` subcommand. +- LSP server using `tower-lsp` over stdio. +- `LanguageBackend` trait introduced; `HtclBackend` is the only + implementation initially. The dispatch by file extension is in + place from the start (it just always routes to `HtclBackend`). +- Wire up the existing `vw-htcl` analysis through `HtclBackend`: + diagnostics, document symbols, hover for proc docs, go-to-definition + for `src` targets and proc references. +- Completion for proc arguments (using the signature data from phase + 2). +- VS Code extension stub: activates on `.htcl` and `vw.toml`, launches + `vw analyzer`. + +Deliverable: opening an htcl project in VS Code gives diagnostics, +hover, and basic completion. The LSP is genuinely useful for htcl +from this point forward; later phases add features and bring VHDL +into the same server. + +### Phase 4: structured wire responses + +Goal: avoid Rust-side TCL parsing for structured outputs. + +- Add `eval_structured` op to wire protocol. +- Write Vivado-shim wrappers for the initial command set + (`report_property`, `get_cells`-family, etc.). +- Rust-side types for the parsed results. + +Deliverable: `report_property` returns a typed Rust value, not a string, +in the executor. + +### Phase 5: VHDL via vhdl_ls proxy + +Goal: bring VHDL into `vw analyzer` so it serves both languages from a +single process; the user-facing surface for a unified LSP is in place. + +- `VhdlBackend` implementation that spawns `vhdl_ls` as a subprocess + and proxies LSP requests for `.vhd` / `.vhdl` files. +- Generate `vhdl_ls.toml` from `vw.toml` (reuse the existing `vw` + logic for this) and point the subprocess at it. Regenerate when + `vw.toml` changes. +- File-type dispatch in `vw-analyzer` now routes htcl files to + `HtclBackend` and VHDL files to `VhdlBackend`. +- Cross-language query surface (`find_symbol` etc.) implemented on + `VhdlBackend` via `workspace/symbol` and related `vhdl_ls` + queries. +- Cancellation, lifecycle, and error handling for the subprocess. +- Performance: confirm the proxy adds acceptable overhead. If + noticeable, profile and optimize. + +Deliverable: a single `vw analyzer` process serves htcl and VHDL. +Editors configured to use it see consistent behavior across both +languages without needing a separate `vhdl_ls` configuration. +Cross-language queries from htcl to VHDL work but aren't yet +user-facing (next phase wires up the htcl-side attributes). + +### Phase 6: cross-language analysis + +Goal: htcl ↔ VHDL navigation (the user-facing cross-language +features, building on phase 5's backend wiring). + +- `@vhdl_entity(name)` attribute on htcl procs declaring the entity + they wrap. +- `HtclBackend` resolves entity references by issuing `find_symbol` + to `VhdlBackend`. Go-to-definition surfaces the resulting location. +- Find-references on a VHDL entity surfaces both VHDL instantiations + (from `VhdlBackend`) and htcl wrappers (from `HtclBackend`'s index + of `@vhdl_entity` attributes). +- Generic-to-argument mapping: optional in this phase, depending on + how much work the `find_symbol` extension to "give me this entity's + generics" turns out to be. + +Deliverable: clicking through an htcl proc into its VHDL entity +works, in both directions. + +### Phase 7: REPL + +Goal: ship the REPL as a meaningful interactive environment. See the +dedicated "REPL design" section above for the full treatment. + +- `vw-repl` crate, `vw repl` subcommand. +- Built with `ratatui` and `reedline`. +- Initial feature set (per the REPL phasing subsection): tab completion, + signature help, persistent history, multi-line input, Vivado worker + lifecycle management, pretty-printed structured results (relies on + phase 4). + +Deliverable: a meaningfully better experience than the Vivado console +for exploring a live design — discoverable commands, inline validation, +overlay-based help that doesn't clutter scrollback. + +### Phase 8: LSP polish + +Goal: bring the analyzer up to "rust-analyzer-quality" expectations for +the features that matter most. + +- Find references across the workspace. +- Rename (cautious; abort on ambiguity). +- Workspace symbols. +- Code actions (add missing required argument, remove unused import, + convert raw `set_property -dict` to a structured call). +- Performance tuning; consider `salsa` if hand-rolled caching shows its + limits. + +Deliverable: an analyzer that meets the acceptance criteria in the LSP +design section. + +### Later (not in initial plan) + +- Type system (typed IP handles, units, phases, constraint scopes). +- Quartus backend. +- **Oxide VHDL frontend integration.** Replace the `vhdl_ls` proxy + `VhdlBackend` with a direct integration with Oxide's developing + VHDL frontend. Same `LanguageBackend` trait, different + implementation. Timing depends on the frontend's maturity; the + `LanguageBackend` abstraction exists from phase 3 specifically to + make this swap possible without rippling into htcl-side code. +- Tracing / profiling of TCL execution. +- Distributed worker pools for parallel synthesis runs. +- `vw fmt` (htcl formatter). +- Mechanism for htcl parameter doc comments to propagate into + Vivado-generated wrappers (requires Xilinx-side support; see + "Wrapper documentation" in Strategic Context). + +Not in this project at all (separate downstream tooling): + +- IP-XACT → htcl wrapper generation (a sideband tool). +- IP-XACT + configuration values → RSF generation (a separate tool; + see "RSF generation" in Strategic Context). + +## Open questions to resolve with the author + +1. **Final name for the htcl dialect.** Working name; pick something + durable before shipping anything publicly. This is just the language + name now, not a tool name. +2. **Module namespace semantics.** Global (TCL-compatible, simple) or + scoped with explicit exports (better complexity management, bigger + change)? Recommend scoped, but flag for discussion. +3. **`src` vs `use` vs `import` vs `mod` keyword.** Recommend `use` for + familiarity (Rust) and to avoid the `src/` directory collision. +4. **Shim distribution.** Ship the shim TCL embedded in the `vw-vivado` + binary, written to a temp file at worker startup? Or expect it on + disk somewhere? Embedded is simpler for users; do that unless there's + a reason not to. +5. **`vw-lib` extensions to confirm or schedule:** + - Stable Rust API for "give me the resolved cache path for dependency + X" — confirm shape. + - Generalization of the dependency selector to per-language keys + (`src` for VHDL, `htcl` for htcl), or staying with `src` plus a + parallel `htcl` field. + - Path dependencies (`path = "..."`) — present, absent, or in + progress? +6. **Cross-language wrapper attribute name.** `@vhdl_entity(name)` is + the working syntax for declaring which VHDL entity an htcl proc + wraps. Confirm or rename. +7. **Showcase IP selection.** For the Vivado-team pitch, which IPs do + we cover in the initial generated `xilinx-ip` package? The + IP-XACT → htcl generator (a separate sideband tool) produces + wrappers mechanically, but the showcase needs to demonstrate + quality at a level that earns the conversation. Pick a small set + where the generated wrappers will look genuinely good, plus one or + two complex IPs (DCMAC, CIPS) where the value of source-controlled + configuration is most visible. +8. **`vhdl_ls` proxy specifics.** Phase 5 wires up VHDL via a + subprocess proxy to `vhdl_ls`. Open: does `vhdl_ls`'s + `workspace/symbol` interface answer the cross-language queries we + need (entity location, generic lists)? If not, what's the + smallest extension to either the proxy or to `vhdl_ls` itself that + closes the gap? Confirm before phase 5 starts. +9. **Evidence-gathering for eventual types.** Not a v1 question, but + worth a habit from day one: when working with htcl, keep a log of + cases where a type system would have caught a real bug or documented + intent meaningfully. Revisit the types decision only when there's a + concrete case file to design against. + +## Non-goals + +- TCL language compatibility. We are not implementing TCL; we are implementing + a different language that happens to share TCL's value model and emits TCL + to Vivado. +- General TCL extension authorship. The Vivado shim is the only TCL we write + intentionally; it stays small. +- Replacing Vivado's interpreter in-process. We talk to it over a pipe. +- Supporting every Vivado command natively. Most commands pass through as + strings; we add structured wrappers only where they pay off. +- A package registry. Git-source dependencies cover the realistic needs; a + registry is a separate company. +- **IP-XACT awareness in vw, the analyzer, or the REPL.** IP-XACT is a + source format for *generating* htcl IP wrapper packages via a + separate sideband tool. The tooling described in this plan consumes + only htcl; it has no IP-XACT-specific code paths, data structures, + or features. See "Strategic context" for the rationale. +- **A replacement for IP-XACT.** htcl is a configuration interface + layer that sits above IP-XACT (specification) and below generated + RTL/RSF (instantiation). It does not describe ports, memory maps, + or any other aspect of an IP's structure — those remain IP-XACT's + responsibility. See "Conceptual layering" in Strategic Context. +- **Port-level analysis of generated RTL.** htcl wrappers don't + describe the ports their instantiation will emit; ports come from + the VHDL/Verilog Vivado generates. Cross-IP wiring analysis is + possible but lives in the VHDL analyzer, not in htcl. +- **Memory-map description.** Not htcl's job. Register interfaces are + generated from IP-XACT plus configuration values by a separate + pipeline targeting RSF; see "RSF generation" in Strategic Context. + +## Reference points + +- `vw`: https://github.com/oxidecomputer/vw — the host project for this + work. `vw-lib` is the existing library that handles dependency + resolution, caching, and manifest/lockfile management. The plan + extends `vw` with htcl-language support and an analyzer/repl modeled + on rust-analyzer's relationship to Cargo. +- rust-analyzer + Cargo: the architectural model. rust-analyzer reads + `Cargo.toml` / `Cargo.lock`, resolves dependencies through Cargo's + data model, and provides editor support without reimplementing the + build tool. `vw analyzer` plays the same role for `vw.toml` / + `vw.lock`. +- UG835: Vivado Design Suite Tcl Command Reference — the authoritative + source for what commands exist and what they return. +- IP-XACT (IEEE 1685): the schema for IP component metadata. *Not used + internally by vw, the analyzer, or the REPL.* IP-XACT is the source + format consumed by a separate sideband tool that generates `.htcl` + IP packages, which vw then resolves like any other dependency. +- TypeScript: model for "additive features over an existing language" + done well. Discipline: existing code keeps working (except htcl + breaks this intentionally for the proc-arg case), new features are + opt-in, output is consumable by tools that don't know about the new + features. diff --git a/vw-analyzer/Cargo.toml b/vw-analyzer/Cargo.toml new file mode 100644 index 0000000..d805726 --- /dev/null +++ b/vw-analyzer/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "vw-analyzer" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "Multi-language LSP server for the vw HDL workflow (htcl native; VHDL via vhdl_ls proxy in a later phase)" + +[[bin]] +name = "vw-analyzer" +path = "src/main.rs" + +[dependencies] +vw-htcl = { path = "../vw-htcl" } +vw-lib = { path = "../vw-lib" } +camino.workspace = true +serde.workspace = true +serde_json.workspace = true +tokio.workspace = true +async-trait.workspace = true +tower-lsp.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true + +[dev-dependencies] +tempfile.workspace = true diff --git a/vw-analyzer/src/backend.rs b/vw-analyzer/src/backend.rs new file mode 100644 index 0000000..770ea88 --- /dev/null +++ b/vw-analyzer/src/backend.rs @@ -0,0 +1,91 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! [`LanguageBackend`] — per-language analysis surface consumed by the +//! LSP server. +//! +//! Even though only [`HtclBackend`](crate::HtclBackend) exists today, +//! defining the trait from day one is the architectural commitment +//! described in the project plan: VHDL via a `vhdl_ls` proxy (phase 5) +//! and a future direct Oxide-VHDL-frontend integration both slot in as +//! additional implementations without changing the server or +//! cross-language htcl code. + +use async_trait::async_trait; +use tower_lsp::lsp_types::{ + CompletionItem, Diagnostic, DocumentSymbol, Hover, Location, Position, + SignatureHelp, Url, +}; + +#[async_trait] +pub trait LanguageBackend: Send + Sync { + /// Language id (`"htcl"`, `"vhdl"`, ...) — used for tracing and + /// dispatch. + fn language_id(&self) -> &str; + + /// Whether this backend handles the given file. Default: match by + /// extension. + fn handles(&self, uri: &Url) -> bool; + + /// Update the backend's view of `uri`'s contents. Called on + /// `did_open` and every `did_change`. The backend should treat + /// this as the new authoritative source and may eagerly compute + /// (and cache) analysis results. + async fn set_text(&self, uri: Url, text: String); + + /// Forget any state for `uri`. + async fn close(&self, uri: &Url); + + /// Diagnostics for the current text of `uri`. The server pushes + /// these to the editor via `textDocument/publishDiagnostics` after + /// every text update. + async fn diagnostics(&self, uri: &Url) -> Vec; + + /// Document symbols ("outline view") for `uri`. + async fn document_symbols(&self, uri: &Url) -> Vec; + + /// Hover content for the construct at `position`. Returns `None` + /// if the cursor isn't on anything the backend has something to + /// say about. + async fn hover(&self, uri: &Url, position: Position) -> Option; + + /// Definition site for the reference at `position`. Returns + /// `None` if the cursor isn't on a known reference. Returns + /// possibly multiple locations because, in general, a name may + /// have several defining sites (overloads, conditional + /// definitions); the Phase 2 htcl backend only returns one. + async fn goto_definition( + &self, + uri: &Url, + position: Position, + ) -> Vec; + + /// Completion items for the cursor at `position`. Empty when the + /// backend has nothing to offer in that context. + async fn completion( + &self, + uri: &Url, + position: Position, + ) -> Vec; + + /// Signature help for the call enclosing `position`. `None` when + /// the cursor isn't inside a call the backend recognizes. + async fn signature_help( + &self, + uri: &Url, + position: Position, + ) -> Option; +} + +/// A symbol surfaced from a backend, language-neutral. Backends that +/// need richer fields can build [`DocumentSymbol`] directly; this is +/// a convenience for the common cases that fit a flat name+kind+span. +#[derive(Clone, Debug)] +pub struct SymbolInfo { + pub name: String, + pub kind: tower_lsp::lsp_types::SymbolKind, + pub detail: Option, + pub range: tower_lsp::lsp_types::Range, + pub selection_range: tower_lsp::lsp_types::Range, +} diff --git a/vw-analyzer/src/htcl_backend.rs b/vw-analyzer/src/htcl_backend.rs new file mode 100644 index 0000000..486f731 --- /dev/null +++ b/vw-analyzer/src/htcl_backend.rs @@ -0,0 +1,1307 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! htcl [`LanguageBackend`] — native, in-process, using `vw-htcl`. + +use std::collections::HashMap; +use std::fmt::Write; +use std::sync::Arc; + +use async_trait::async_trait; +use tokio::sync::RwLock; +use tower_lsp::lsp_types::{ + CompletionItem, CompletionItemKind, Diagnostic, DiagnosticSeverity, + DocumentSymbol, Documentation, Hover, HoverContents, InsertTextFormat, + Location, MarkupContent, MarkupKind, ParameterInformation, ParameterLabel, + Position, Range, SignatureHelp, SignatureInformation, SymbolKind, TextEdit, + Url, +}; +use vw_htcl::{ + complete_at, definition_at, hover_at, parse, signature_help_at, validate, + Attribute, AttributeValue, CommandKind, Completion, CompletionKind, + HoverTarget, LineCol, LineIndex, ProcArg, ProcSignature, Severity, Stmt, +}; + +use crate::backend::LanguageBackend; + +#[derive(Default)] +pub struct HtclBackend { + docs: Arc>>, +} + +struct DocState { + text: String, +} + +impl HtclBackend { + pub fn new() -> Self { + Self::default() + } +} + +#[async_trait] +impl LanguageBackend for HtclBackend { + fn language_id(&self) -> &str { + "htcl" + } + + fn handles(&self, uri: &Url) -> bool { + uri.path().ends_with(".htcl") + } + + async fn set_text(&self, uri: Url, text: String) { + self.docs.write().await.insert(uri, DocState { text }); + } + + async fn close(&self, uri: &Url) { + self.docs.write().await.remove(uri); + } + + async fn diagnostics(&self, uri: &Url) -> Vec { + let docs = self.docs.read().await; + let Some(doc) = docs.get(uri) else { + return Vec::new(); + }; + // Parse errors are file-local: report from the open document's + // own parse. (Imports' parse errors are diagnosed when their + // file is the open one.) + let parsed_local = parse(&doc.text); + let line_index = LineIndex::new(&doc.text); + let mut out = Vec::new(); + for err in &parsed_local.errors { + let (start, end) = line_index.range(err.span); + out.push(Diagnostic { + range: Range { + start: lc_to_pos(start), + end: lc_to_pos(end), + }, + severity: Some(DiagnosticSeverity::ERROR), + source: Some("vw-htcl".into()), + message: err.message.clone(), + ..Default::default() + }); + } + + // For validator diagnostics: validate the workspace view so + // imported proc signatures are in scope, then keep only the + // diagnostics that land in this file. That way calling an + // imported proc no longer reads as "unknown proc" but a typo + // *in* this file still does. + let view = crate::workspace::build_view(uri, &doc.text); + let parsed_view = parse(&view.view_source); + for d in validate(&parsed_view.document, &view.view_source) { + if d.span.start >= view.local_len { + continue; + } + let (start, end) = line_index.range(d.span); + let severity = match d.severity { + Severity::Error => DiagnosticSeverity::ERROR, + Severity::Warning => DiagnosticSeverity::WARNING, + }; + out.push(Diagnostic { + range: Range { + start: lc_to_pos(start), + end: lc_to_pos(end), + }, + severity: Some(severity), + source: Some("vw-htcl".into()), + message: d.message, + ..Default::default() + }); + } + out + } + + async fn document_symbols(&self, uri: &Url) -> Vec { + let docs = self.docs.read().await; + let Some(doc) = docs.get(uri) else { + return Vec::new(); + }; + let parsed = parse(&doc.text); + let line_index = LineIndex::new(&doc.text); + let mut symbols = Vec::new(); + for stmt in &parsed.document.stmts { + let Stmt::Command(cmd) = stmt else { continue }; + let CommandKind::Proc(proc) = &cmd.kind else { + continue; + }; + let name = proc.name.clone().unwrap_or_else(|| "".into()); + let (cmd_start, cmd_end) = line_index.range(cmd.span); + let (name_start, name_end) = line_index.range(proc.name_span); + let detail = if cmd.doc_comments.is_empty() { + None + } else { + Some(cmd.doc_comments.join("\n")) + }; + #[allow(deprecated)] + symbols.push(DocumentSymbol { + name, + detail, + kind: SymbolKind::FUNCTION, + tags: None, + deprecated: None, + range: Range { + start: lc_to_pos(cmd_start), + end: lc_to_pos(cmd_end), + }, + selection_range: Range { + start: lc_to_pos(name_start), + end: lc_to_pos(name_end), + }, + children: None, + }); + } + symbols + } + + async fn goto_definition( + &self, + uri: &Url, + position: Position, + ) -> Vec { + let docs = self.docs.read().await; + let Some(doc) = docs.get(uri) else { + return Vec::new(); + }; + let line_index = LineIndex::new(&doc.text); + let offset = line_index.offset_of(LineCol { + line: position.line, + character: position.character, + }); + + // Special case: cursor on a `src @dep/foo` path → jump to the + // imported file. Resolved through the same `vw-lib` machinery + // the CLI uses, so editor and CLI agree on the same target. + let parsed_local = parse(&doc.text); + if let Some(import) = src_import_at(&parsed_local.document, offset) { + if let Some(raw) = import.path.as_deref() { + let Ok(file_path) = uri.to_file_path() else { + return Vec::new(); + }; + if let Some(resolved) = + crate::workspace::resolve_import(&file_path, raw) + { + if let Ok(target_uri) = Url::from_file_path(resolved) { + return vec![Location { + uri: target_uri, + range: Range::default(), + }]; + } + } + } + return Vec::new(); + } + + // General case: resolve against the workspace view so calls to + // imported procs jump to the right file. + let view = crate::workspace::build_view(uri, &doc.text); + let parsed_view = parse(&view.view_source); + let Some(target_span) = + definition_at(&parsed_view.document, &view.view_source, offset) + else { + return Vec::new(); + }; + + // Translate the target span back to its source file: local + // file when in the original region, otherwise the imported + // file whose appended region contains it. + match view.locate(target_span.start) { + None => { + let (start, end) = line_index.range(target_span); + vec![Location { + uri: uri.clone(), + range: Range { + start: lc_to_pos(start), + end: lc_to_pos(end), + }, + }] + } + Some((region, _)) => { + // Read the imported file's text so we can build a + // file-local line index. (Already on disk; cheap.) + let Ok(import_path) = region.file_uri.to_file_path() else { + return Vec::new(); + }; + let Ok(import_text) = std::fs::read_to_string(&import_path) + else { + return Vec::new(); + }; + let import_index = LineIndex::new(&import_text); + let local_start = target_span.start - region.start; + let local_end = target_span.end - region.start; + let (s, e) = import_index + .range(vw_htcl::Span::new(local_start, local_end)); + vec![Location { + uri: region.file_uri.clone(), + range: Range { + start: lc_to_pos(s), + end: lc_to_pos(e), + }, + }] + } + } + } + + async fn hover(&self, uri: &Url, position: Position) -> Option { + let docs = self.docs.read().await; + let doc = docs.get(uri)?; + let line_index = LineIndex::new(&doc.text); + let offset = line_index.offset_of(LineCol { + line: position.line, + character: position.character, + }); + // Use the workspace view so a hover on a call to an imported + // proc shows that proc's signature, not nothing. + let view = crate::workspace::build_view(uri, &doc.text); + let parsed = parse(&view.view_source); + let target = hover_at(&parsed.document, &view.view_source, offset)?; + // The hover span is in view-source coordinates; only translate + // back to line/col when it lands in the local file (which is + // always true for a cursor hover from this editor). + if target.span().start >= view.local_len { + return None; + } + let (start, end) = line_index.range(target.span()); + // The proc's own doc comments live on the surrounding Command, + // not on its `Proc` payload — fetch them up here so the + // formatters can stay focused on shape, not lookup plumbing. + let proc_doc_comments = match &target { + HoverTarget::ProcDef { proc, .. } => { + proc_doc_comments_for(&parsed.document, proc) + } + HoverTarget::CallSite { proc_name, .. } => { + proc_doc_comments_by_name(&parsed.document, proc_name) + } + _ => Vec::new(), + }; + let markdown = format_hover(&target, &proc_doc_comments); + Some(Hover { + contents: HoverContents::Markup(MarkupContent { + kind: MarkupKind::Markdown, + value: markdown, + }), + range: Some(Range { + start: lc_to_pos(start), + end: lc_to_pos(end), + }), + }) + } + + async fn completion( + &self, + uri: &Url, + position: Position, + ) -> Vec { + let docs = self.docs.read().await; + let Some(doc) = docs.get(uri) else { + return Vec::new(); + }; + let line_index = LineIndex::new(&doc.text); + let offset = line_index.offset_of(LineCol { + line: position.line, + character: position.character, + }); + + // `src ` is filesystem-aware, so it takes its own + // path before we fall back to the htcl-level analyzer. + let line = vw_htcl::cmdline::analyze(&doc.text, offset); + if crate::src_complete::is_src_path_context(&line) { + if let Ok(entry_file) = uri.to_file_path() { + let resolver = crate::workspace::build_resolver(&entry_file); + return crate::src_complete::src_path_completions( + &entry_file, + &line, + &line_index, + &resolver, + ); + } + } + + // Workspace view here too: command-position completion picks + // up imported proc names. + let view = crate::workspace::build_view(uri, &doc.text); + let parsed = parse(&view.view_source); + complete_at(&parsed.document, &view.view_source, offset) + .into_iter() + // The completion result's `replace` span is in view + // coordinates; if it slipped past the local region we + // drop it (shouldn't happen for in-file cursors, but + // defensive). + .filter(|c| c.replace.start < view.local_len) + .map(|c| completion_item(c, &line_index)) + .collect() + } + + async fn signature_help( + &self, + uri: &Url, + position: Position, + ) -> Option { + let docs = self.docs.read().await; + let doc = docs.get(uri)?; + let line_index = LineIndex::new(&doc.text); + let offset = line_index.offset_of(LineCol { + line: position.line, + character: position.character, + }); + // Workspace view so signatures of imported procs surface, and + // so the cmdline scan can step into a `[ … ]` substitution + // (the parser now carries a `body` inside `CmdSubst` and the + // scan already treats `[` as a command boundary). + let view = crate::workspace::build_view(uri, &doc.text); + let parsed = parse(&view.view_source); + let help = + signature_help_at(&parsed.document, &view.view_source, offset)?; + Some(signature_help_response(&help)) + } +} + +// --- completion / signature-help formatters ------------------------------- + +fn completion_item(c: Completion, line_index: &LineIndex) -> CompletionItem { + let kind = match c.kind { + CompletionKind::Proc => CompletionItemKind::FUNCTION, + CompletionKind::Flag => CompletionItemKind::FIELD, + CompletionKind::EnumValue => CompletionItemKind::ENUM_MEMBER, + }; + let (start, end) = line_index.range(c.replace); + let text_edit = TextEdit { + range: Range { + start: lc_to_pos(start), + end: lc_to_pos(end), + }, + new_text: c.label.clone(), + }; + CompletionItem { + label: c.label, + kind: Some(kind), + detail: c.detail, + documentation: c.documentation.map(|value| { + Documentation::MarkupContent(MarkupContent { + kind: MarkupKind::Markdown, + value, + }) + }), + insert_text_format: Some(InsertTextFormat::PLAIN_TEXT), + text_edit: Some(tower_lsp::lsp_types::CompletionTextEdit::Edit( + text_edit, + )), + ..Default::default() + } +} + +fn signature_help_response(help: &vw_htcl::SignatureHelp<'_>) -> SignatureHelp { + // Build the rendered signature label and, in lockstep, the + // [start, end) offsets each parameter occupies within it so the + // editor highlights the active one. Names are identifiers, so + // UTF-16 and char counts coincide. + let mut label = help.proc_name.clone(); + let mut parameters = Vec::with_capacity(help.signature.args.len()); + for arg in &help.signature.args { + label.push(' '); + let start = label.chars().count() as u32; + label.push('-'); + label.push_str(&arg.name); + if let Some(ty) = arg.type_annotation.as_ref() { + label.push_str(": "); + label.push_str(&render_type(ty)); + } + let end = label.chars().count() as u32; + parameters.push(ParameterInformation { + label: ParameterLabel::LabelOffsets([start, end]), + documentation: vw_htcl::doc::brief(&arg.doc_comments) + .map(Documentation::String), + }); + } + // Append the return type to the signature label when present. + // Renders as `proc-name -arg1 -arg2 → bd_cell`. + if let Some(ty) = help.signature.return_type.as_ref() { + label.push_str(" → "); + label.push_str(&render_type(ty)); + } + + let reflowed = vw_htcl::doc::reflow_doc_comments(help.doc_comments); + let documentation = (!reflowed.is_empty()).then_some({ + Documentation::MarkupContent(MarkupContent { + kind: MarkupKind::Markdown, + value: reflowed, + }) + }); + + #[allow(deprecated)] // `active_parameter` field on SignatureInformation + let info = SignatureInformation { + label, + documentation, + parameters: Some(parameters), + active_parameter: help.active_parameter, + }; + + SignatureHelp { + signatures: vec![info], + active_signature: Some(0), + active_parameter: help.active_parameter, + } +} + +// --- src import lookup ---------------------------------------------------- + +/// If the cursor at `offset` is on the path word of a `src ` +/// statement, return that import. Used by `goto_definition` to jump +/// to the imported module. +fn src_import_at( + document: &vw_htcl::Document, + offset: u32, +) -> Option<&vw_htcl::SrcImport> { + for stmt in &document.stmts { + let Stmt::Command(cmd) = stmt else { continue }; + let CommandKind::Src(import) = &cmd.kind else { + continue; + }; + if import.path_span.contains(offset) { + return Some(import); + } + } + None +} + +// --- doc-comment lookup --------------------------------------------------- + +fn proc_doc_comments_for( + document: &vw_htcl::Document, + proc: &vw_htcl::Proc, +) -> Vec { + proc_doc_comments_for_in(&document.stmts, proc).unwrap_or_default() +} + +fn proc_doc_comments_for_in( + stmts: &[Stmt], + proc: &vw_htcl::Proc, +) -> Option> { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + match &cmd.kind { + CommandKind::Proc(p) + // Pointer-identity match: `proc` was looked up out + // of this same parse, so its address inside the AST + // is unique. + if std::ptr::eq(p, proc) => { + return Some(cmd.doc_comments.clone()); + } + CommandKind::NamespaceEval(ns) => { + if let Some(found) = proc_doc_comments_for_in(&ns.body, proc) { + return Some(found); + } + } + _ => {} + } + } + None +} + +fn proc_doc_comments_by_name( + document: &vw_htcl::Document, + name: &str, +) -> Vec { + proc_doc_comments_by_name_in(&document.stmts, "", name).unwrap_or_default() +} + +fn proc_doc_comments_by_name_in( + stmts: &[Stmt], + prefix: &str, + name: &str, +) -> Option> { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + match &cmd.kind { + CommandKind::Proc(p) => { + let Some(decl_name) = p.name.as_deref() else { + continue; + }; + let qualified = if prefix.is_empty() { + decl_name.to_string() + } else { + format!("{prefix}::{decl_name}") + }; + if qualified == name { + return Some(cmd.doc_comments.clone()); + } + } + CommandKind::NamespaceEval(ns) => { + let Some(ns_name) = ns.name.as_deref() else { + continue; + }; + let nested = if prefix.is_empty() { + ns_name.to_string() + } else { + format!("{prefix}::{ns_name}") + }; + if let Some(found) = + proc_doc_comments_by_name_in(&ns.body, &nested, name) + { + return Some(found); + } + } + _ => {} + } + } + None +} + +/// Render a type expression in the canonical user-facing form — +/// `dict`, `list`, etc. Used by hover and +/// signature-help so the displayed type matches what the user +/// would write in source. +fn render_type(ty: &vw_htcl::TypeExpr) -> String { + match ty { + vw_htcl::TypeExpr::Named { name, .. } => name.clone(), + vw_htcl::TypeExpr::Generic { name, args, .. } => { + let inner: Vec = args.iter().map(render_type).collect(); + format!("{name}<{}>", inner.join(",")) + } + vw_htcl::TypeExpr::Qualified { + namespace, variant, .. + } => { + format!("{namespace}::{variant}") + } + } +} + +// --- markdown formatters -------------------------------------------------- + +fn format_hover(target: &HoverTarget, proc_doc_comments: &[String]) -> String { + match target { + HoverTarget::ProcDef { proc, .. } => format_proc( + proc.name.as_deref().unwrap_or(""), + proc.signature.as_ref(), + proc_doc_comments, + ), + HoverTarget::CallSite { + proc_name, + signature, + .. + } => format_proc(proc_name, Some(signature), proc_doc_comments), + HoverTarget::ProcArgDef { arg, .. } + | HoverTarget::CallArg { arg, .. } => format_arg(arg), + HoverTarget::LocalVar { name, .. } => format_local_var(name), + HoverTarget::EnumDef { decl, .. } => format_enum(decl), + } +} + +fn format_enum(decl: &vw_htcl::EnumDecl) -> String { + let mut out = String::new(); + let name = decl.name.as_deref().unwrap_or(""); + writeln!(out, "```htcl").unwrap(); + writeln!(out, "enum {name} = {{").unwrap(); + for v in &decl.variants { + match v.payload.as_ref() { + Some(p) => { + writeln!(out, " {}: {}", v.name, render_type(p)).unwrap() + } + None => writeln!(out, " {}", v.name).unwrap(), + } + } + writeln!(out, "}}").unwrap(); + writeln!(out, "```").unwrap(); + out.push_str("\nTagged sum type. The compiler auto-generates "); + out.push_str("constructors (`::`), repr, and "); + out.push_str("`tag`/`payload` accessors. See "); + out.push_str("docs/htcl-enums.md for the full semantics.\n"); + out +} + +fn format_local_var(name: &str) -> String { + let mut out = String::new(); + writeln!(out, "```htcl").unwrap(); + writeln!(out, "${name}").unwrap(); + writeln!(out, "```").unwrap(); + out.push_str("\nLocal variable.\n"); + out +} + +fn format_proc( + name: &str, + signature: Option<&ProcSignature>, + proc_doc_comments: &[String], +) -> String { + let mut out = String::new(); + writeln!(out, "```htcl").unwrap(); + // Include the return type in the proc header when annotated: + // proc foo → string + // Unannotated procs render unchanged (`proc foo`). + let return_ty = signature.and_then(|s| s.return_type.as_ref()); + match return_ty { + Some(ty) => { + writeln!(out, "proc {name} → {}", render_type(ty)).unwrap(); + } + None => { + writeln!(out, "proc {name}").unwrap(); + } + } + writeln!(out, "```").unwrap(); + let reflowed = vw_htcl::doc::reflow_doc_comments(proc_doc_comments); + if !reflowed.is_empty() { + out.push('\n'); + out.push_str(&reflowed); + out.push('\n'); + } + if let Some(sig) = signature { + if !sig.args.is_empty() { + out.push_str("\n### Parameters\n\n"); + for arg in &sig.args { + match arg.type_annotation.as_ref() { + Some(ty) => { + write!(out, "- `-{}: {}`", arg.name, render_type(ty)) + .unwrap(); + } + None => { + write!(out, "- `-{}`", arg.name).unwrap(); + } + } + let reflowed = + vw_htcl::doc::reflow_doc_comments(&arg.doc_comments); + let mut paragraphs = reflowed.split("\n\n"); + if let Some(brief) = paragraphs.next().filter(|s| !s.is_empty()) + { + write!(out, " — {brief}").unwrap(); + } + out.push('\n'); + for extra in paragraphs.filter(|s| !s.is_empty()) { + writeln!(out, " {extra}").unwrap(); + } + for attr in &arg.attributes { + writeln!(out, " - `{}`", format_attribute(attr)).unwrap(); + } + } + } + } + out +} + +fn format_arg(arg: &ProcArg) -> String { + let mut out = String::new(); + writeln!(out, "```htcl").unwrap(); + match arg.type_annotation.as_ref() { + Some(ty) => { + writeln!(out, "-{}: {}", arg.name, render_type(ty)).unwrap() + } + None => writeln!(out, "-{}", arg.name).unwrap(), + } + writeln!(out, "```").unwrap(); + let reflowed = vw_htcl::doc::reflow_doc_comments(&arg.doc_comments); + if !reflowed.is_empty() { + out.push('\n'); + out.push_str(&reflowed); + out.push('\n'); + } + if !arg.attributes.is_empty() { + out.push('\n'); + for attr in &arg.attributes { + writeln!(out, "- `{}`", format_attribute(attr)).unwrap(); + } + } + out +} + +fn format_attribute(attr: &Attribute) -> String { + if attr.values.is_empty() { + format!("@{}", attr.name) + } else { + let values: Vec = + attr.values.iter().map(format_attribute_value).collect(); + format!("@{}({})", attr.name, values.join(", ")) + } +} + +fn format_attribute_value(v: &AttributeValue) -> String { + match v { + AttributeValue::Integer { value, .. } => value.to_string(), + AttributeValue::Ident { value, .. } => value.clone(), + AttributeValue::String { value, .. } => format!("\"{value}\""), + } +} + +fn lc_to_pos(lc: LineCol) -> Position { + Position { + line: lc.line, + character: lc.character, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn uri() -> Url { + Url::parse("file:///tmp/x.htcl").unwrap() + } + + #[tokio::test] + async fn handles_htcl_extension() { + let backend = HtclBackend::new(); + assert!(backend.handles(&uri())); + assert!(!backend.handles(&Url::parse("file:///tmp/x.vhd").unwrap())); + } + + #[tokio::test] + async fn diagnostics_for_unterminated_string() { + let backend = HtclBackend::new(); + backend + .set_text(uri(), "puts \"oops\nputs ok\n".into()) + .await; + let diags = backend.diagnostics(&uri()).await; + assert!(!diags.is_empty(), "expected at least one diagnostic"); + assert_eq!(diags[0].severity, Some(DiagnosticSeverity::ERROR)); + assert!(diags[0].message.contains("unterminated string")); + } + + #[tokio::test] + async fn document_symbols_include_proc() { + let backend = HtclBackend::new(); + backend + .set_text( + uri(), + "## greet someone\nproc greet {name} { puts hi }\n".into(), + ) + .await; + let symbols = backend.document_symbols(&uri()).await; + assert_eq!(symbols.len(), 1); + assert_eq!(symbols[0].name, "greet"); + assert_eq!(symbols[0].kind, SymbolKind::FUNCTION); + assert_eq!(symbols[0].detail.as_deref(), Some("greet someone")); + } + + #[tokio::test] + async fn validator_diagnostics_surface_in_lsp() { + let backend = HtclBackend::new(); + backend + .set_text( + uri(), + "proc axis {\n @enum(1, 2, 4) width\n} { }\n\ + axis -width 3\n" + .into(), + ) + .await; + let diags = backend.diagnostics(&uri()).await; + assert!( + diags.iter().any(|d| d.message.contains("@enum")), + "{:?}", + diags + ); + } + + #[tokio::test] + async fn hover_on_call_site_shows_signature() { + let backend = HtclBackend::new(); + let src = "\ +## Greet someone by name.\n\ +proc greet {\n\ + ## Who to greet.\n\ + @default(\"world\") name\n\ +} { puts \"hi $name\" }\n\ +greet -name there\n"; + backend.set_text(uri(), src.into()).await; + // Cursor on the `g` of the call-site `greet`. Line indices + // are 0-based. + let hover = backend + .hover( + &uri(), + Position { + line: 5, + character: 0, + }, + ) + .await + .expect("hover should return content"); + let body = match hover.contents { + HoverContents::Markup(m) => m.value, + _ => panic!("expected markup"), + }; + assert!(body.contains("proc greet"), "{body}"); + assert!(body.contains("Greet someone by name."), "{body}"); + assert!(body.contains("### Parameters"), "{body}"); + assert!(body.contains("-name"), "{body}"); + assert!(body.contains("Who to greet."), "{body}"); + assert!(body.contains("@default"), "{body}"); + } + + #[tokio::test] + async fn hover_on_call_arg_shows_arg_doc() { + let backend = HtclBackend::new(); + let src = "\ +proc greet {\n\ + ## Who to greet.\n\ + @default(\"world\") name\n\ +} { puts hi }\n\ +greet -name there\n"; + backend.set_text(uri(), src.into()).await; + // Position cursor on `-name` of the call site (line 4 in the + // 0-indexed scheme). + let hover = backend + .hover( + &uri(), + Position { + line: 4, + character: 7, + }, + ) + .await + .expect("hover should return content"); + let body = match hover.contents { + HoverContents::Markup(m) => m.value, + _ => panic!("expected markup"), + }; + assert!(body.contains("-name"), "{body}"); + assert!(body.contains("Who to greet."), "{body}"); + assert!(body.contains("@default"), "{body}"); + // Shouldn't include the proc-level header. + assert!(!body.contains("### Parameters"), "{body}"); + } + + #[tokio::test] + async fn hover_outside_known_construct_returns_none() { + let backend = HtclBackend::new(); + backend.set_text(uri(), "puts hello world\n".into()).await; + let hover = backend + .hover( + &uri(), + Position { + line: 0, + character: 0, + }, + ) + .await; + assert!(hover.is_none()); + } + + #[tokio::test] + async fn goto_definition_jumps_call_to_proc_decl() { + let backend = HtclBackend::new(); + let src = "\ +proc greet {\n name\n} { puts hi }\n\ +greet -name there\n"; + backend.set_text(uri(), src.into()).await; + // Cursor on the `g` of the call-site `greet` (line 3). + let locs = backend + .goto_definition( + &uri(), + Position { + line: 3, + character: 0, + }, + ) + .await; + assert_eq!(locs.len(), 1); + // Decl name `greet` is on line 0 at character 5. + assert_eq!(locs[0].range.start.line, 0); + assert_eq!(locs[0].range.start.character, 5); + } + + #[tokio::test] + async fn goto_definition_resolves_attribute_ident() { + let backend = HtclBackend::new(); + let src = "\ +proc f {\n has_a\n @requires(has_a) has_b\n} { }\n"; + backend.set_text(uri(), src.into()).await; + // Cursor on `has_a` inside `@requires(has_a)`. + let locs = backend + .goto_definition( + &uri(), + Position { + line: 2, + character: 13, + }, + ) + .await; + assert_eq!(locs.len(), 1); + // Decl `has_a` is on line 1 at character 2. + assert_eq!(locs[0].range.start.line, 1); + assert_eq!(locs[0].range.start.character, 2); + } + + #[tokio::test] + async fn completion_offers_proc_names_in_command_position() { + let backend = HtclBackend::new(); + let src = "\ +proc greet {} { }\n\ +proc grumble {} { }\n\ +gr\n"; + backend.set_text(uri(), src.into()).await; + // Cursor at end of `gr` on line 2. + let items = backend + .completion( + &uri(), + Position { + line: 2, + character: 2, + }, + ) + .await; + let mut labels: Vec = + items.iter().map(|i| i.label.clone()).collect(); + labels.sort(); + assert_eq!(labels, vec!["greet", "grumble"]); + assert_eq!(items[0].kind, Some(CompletionItemKind::FUNCTION)); + } + + #[tokio::test] + async fn completion_offers_flags_in_argument_position() { + let backend = HtclBackend::new(); + let src = "\ +proc cfg {\n width\n depth\n} { }\n\ +cfg \n"; + backend.set_text(uri(), src.into()).await; + // Line 4, just after `cfg ` (character 4). + let items = backend + .completion( + &uri(), + Position { + line: 4, + character: 4, + }, + ) + .await; + let mut labels: Vec = + items.iter().map(|i| i.label.clone()).collect(); + labels.sort(); + assert_eq!(labels, vec!["-depth", "-width"]); + assert_eq!(items[0].kind, Some(CompletionItemKind::FIELD)); + } + + #[tokio::test] + async fn signature_help_highlights_active_parameter() { + let backend = HtclBackend::new(); + let src = "\ +## Configure the bus.\n\ +proc cfg {\n width\n depth\n} { }\n\ +cfg -depth \n"; + backend.set_text(uri(), src.into()).await; + // Line 5, after `cfg -depth ` (character 11). + let help = backend + .signature_help( + &uri(), + Position { + line: 5, + character: 11, + }, + ) + .await + .expect("signature help expected"); + assert_eq!(help.active_parameter, Some(1)); + let info = &help.signatures[0]; + assert!(info.label.starts_with("cfg "), "{}", info.label); + assert_eq!(info.parameters.as_ref().unwrap().len(), 2); + match &info.documentation { + Some(Documentation::MarkupContent(m)) => { + assert!(m.value.contains("Configure the bus."), "{}", m.value); + } + other => panic!("expected markup documentation, got {other:?}"), + } + } + + #[tokio::test] + async fn signature_help_includes_return_type_arrow() { + let backend = HtclBackend::new(); + let src = "\ +proc make_widget {} bd_cell { return foo }\n\ +make_widget \n"; + backend.set_text(uri(), src.into()).await; + let help = backend + .signature_help( + &uri(), + Position { + line: 1, + character: 12, + }, + ) + .await + .expect("signature help expected"); + let info = &help.signatures[0]; + // Label should carry the `→ bd_cell` suffix. + assert!(info.label.contains("→ bd_cell"), "{}", info.label); + } + + #[tokio::test] + async fn hover_on_enum_decl_shows_variants() { + let backend = HtclBackend::new(); + let src = "\ +enum Property = {\n Scalar: string\n Nested: int\n}\n"; + backend.set_text(uri(), src.into()).await; + // Cursor on the enum name (line 0, col 5: 'Property'). + let hover = backend + .hover( + &uri(), + Position { + line: 0, + character: 7, + }, + ) + .await + .expect("hover on enum decl name"); + if let HoverContents::Markup(MarkupContent { value, .. }) = + hover.contents + { + assert!(value.contains("enum Property"), "{value}"); + assert!(value.contains("Scalar: string"), "{value}"); + assert!(value.contains("Nested: int"), "{value}"); + } else { + panic!("expected Markup hover"); + } + } + + #[tokio::test] + async fn hover_proc_def_includes_return_type() { + let backend = HtclBackend::new(); + let src = "\ +## Builds a widget.\n\ +proc make_widget {} dict { return {} }\n"; + backend.set_text(uri(), src.into()).await; + // Hover on the proc name `make_widget` at line 1. + let hover = backend + .hover( + &uri(), + Position { + line: 1, + character: 8, + }, + ) + .await + .expect("hover expected on proc def"); + if let HoverContents::Markup(MarkupContent { value, .. }) = + hover.contents + { + assert!( + value.contains("→ dict"), + "expected return type in hover: {value}" + ); + } else { + panic!("expected Markup hover contents"); + } + } + + #[tokio::test] + async fn signature_help_none_outside_call() { + let backend = HtclBackend::new(); + backend.set_text(uri(), "puts hi\n".into()).await; + let help = backend + .signature_help( + &uri(), + Position { + line: 0, + character: 0, + }, + ) + .await; + assert!(help.is_none()); + } + + #[tokio::test] + async fn goto_definition_unknown_returns_empty() { + let backend = HtclBackend::new(); + backend.set_text(uri(), "puts hello\n".into()).await; + let locs = backend + .goto_definition( + &uri(), + Position { + line: 0, + character: 0, + }, + ) + .await; + assert!(locs.is_empty()); + } + + // --- cross-file (workspace view) tests -------------------------------- + + /// Build a temp workspace with a `lib.htcl` defining `greet` and + /// a `main.htcl` that imports it. Returns the backend with both + /// files already opened and the URIs. + async fn temp_workspace_with_import() -> ( + tempfile::TempDir, + HtclBackend, + Url, // main.htcl + Url, // lib.htcl + ) { + let dir = tempfile::tempdir().unwrap(); + let lib_path = dir.path().join("lib.htcl"); + std::fs::write( + &lib_path, + "## Greet someone.\n\ +proc greet {\n ## Who to greet.\n who\n} { puts \"hi $who\" }\n", + ) + .unwrap(); + let main_path = dir.path().join("main.htcl"); + let main_src = "src lib\ngreet -who world\n"; + std::fs::write(&main_path, main_src).unwrap(); + + let backend = HtclBackend::new(); + let main_uri = Url::from_file_path(&main_path).unwrap(); + let lib_uri = Url::from_file_path(&lib_path).unwrap(); + backend.set_text(main_uri.clone(), main_src.into()).await; + (dir, backend, main_uri, lib_uri) + } + + #[tokio::test] + async fn goto_on_src_import_jumps_to_imported_file() { + let (_dir, backend, main_uri, lib_uri) = + temp_workspace_with_import().await; + // Cursor on the `l` of `src lib` (line 0, col 4). + let locs = backend + .goto_definition( + &main_uri, + Position { + line: 0, + character: 4, + }, + ) + .await; + assert_eq!(locs.len(), 1); + assert_eq!(locs[0].uri, lib_uri); + } + + #[tokio::test] + async fn goto_on_call_to_imported_proc_jumps_to_lib() { + let (_dir, backend, main_uri, lib_uri) = + temp_workspace_with_import().await; + // Cursor on `greet` at line 1. + let locs = backend + .goto_definition( + &main_uri, + Position { + line: 1, + character: 0, + }, + ) + .await; + assert_eq!(locs.len(), 1, "{locs:?}"); + assert_eq!(locs[0].uri, lib_uri); + // The declaration of `greet` is on lib.htcl line 1 col 5. + assert_eq!(locs[0].range.start.line, 1); + assert_eq!(locs[0].range.start.character, 5); + } + + #[tokio::test] + async fn completion_in_command_position_lists_imported_procs() { + let (_dir, backend, main_uri, _lib_uri) = + temp_workspace_with_import().await; + // Append a partial proc name at end of file so cursor lands in + // command position. + let new_text = "src lib\ngreet -who world\ngre\n"; + backend.set_text(main_uri.clone(), new_text.into()).await; + let items = backend + .completion( + &main_uri, + Position { + line: 2, + character: 3, + }, + ) + .await; + let labels: Vec<_> = items.iter().map(|i| i.label.as_str()).collect(); + assert!(labels.contains(&"greet"), "labels = {labels:?}"); + } + + #[tokio::test] + async fn hover_on_imported_call_shows_signature() { + let (_dir, backend, main_uri, _lib_uri) = + temp_workspace_with_import().await; + // Hover on `greet` on line 1. + let hover = backend + .hover( + &main_uri, + Position { + line: 1, + character: 0, + }, + ) + .await + .expect("hover"); + let body = match hover.contents { + HoverContents::Markup(m) => m.value, + _ => panic!(), + }; + assert!(body.contains("proc greet"), "{body}"); + assert!(body.contains("Greet someone."), "{body}"); + assert!(body.contains("-who"), "{body}"); + } + + #[tokio::test] + async fn diagnostics_accept_calls_to_imported_procs() { + let (_dir, backend, main_uri, _lib_uri) = + temp_workspace_with_import().await; + // No errors when the call matches the imported signature. + let diags = backend.diagnostics(&main_uri).await; + let errs: Vec<_> = diags + .iter() + .filter(|d| d.severity == Some(DiagnosticSeverity::ERROR)) + .collect(); + assert!(errs.is_empty(), "{errs:?}"); + } + + #[tokio::test] + async fn hover_works_on_call_inside_command_substitution() { + // Mirrors the user's cips.htcl shape: + // src lib + // set cell [greet -who x] + let (_dir, backend, main_uri, _lib_uri) = + temp_workspace_with_import().await; + let new_text = "src lib\nset cell [greet -who x]\n"; + backend.set_text(main_uri.clone(), new_text.into()).await; + // Cursor on `greet` inside the `[ … ]` on line 1. + let hover = backend + .hover( + &main_uri, + Position { + line: 1, + character: 11, + }, + ) + .await + .expect("hover should resolve calls inside `[…]`"); + let body = match hover.contents { + HoverContents::Markup(m) => m.value, + _ => panic!(), + }; + assert!(body.contains("proc greet"), "{body}"); + } + + #[tokio::test] + async fn signature_help_works_on_call_inside_command_substitution() { + let (_dir, backend, main_uri, _lib_uri) = + temp_workspace_with_import().await; + // Cursor right after `greet ` inside `[ … ]`. + let new_text = "src lib\nset cell [greet ]\n"; + backend.set_text(main_uri.clone(), new_text.into()).await; + let help = backend + .signature_help( + &main_uri, + Position { + line: 1, + character: 16, + }, + ) + .await + .expect("signature help inside `[…]`"); + assert!( + help.signatures[0].label.starts_with("greet"), + "{:?}", + help.signatures[0].label + ); + } + + #[tokio::test] + async fn diagnostics_still_flag_wrong_flag_on_imported_call() { + let (_dir, backend, main_uri, _lib_uri) = + temp_workspace_with_import().await; + backend + .set_text(main_uri.clone(), "src lib\ngreet -whoz world\n".into()) + .await; + let diags = backend.diagnostics(&main_uri).await; + assert!( + diags + .iter() + .any(|d| d.message.contains("undefined argument -whoz")), + "{diags:?}" + ); + } +} diff --git a/vw-analyzer/src/lib.rs b/vw-analyzer/src/lib.rs new file mode 100644 index 0000000..0d989e7 --- /dev/null +++ b/vw-analyzer/src/lib.rs @@ -0,0 +1,35 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! `vw analyzer` — multi-language LSP for the vw HDL workflow. +//! +//! The server is built around a [`LanguageBackend`] abstraction even +//! while only [`HtclBackend`] is wired up. This keeps the architectural +//! slot for VHDL (initially a `vhdl_ls` proxy, later a direct Oxide +//! VHDL frontend integration) open from day one — see the project +//! plan's "LSP design" section. + +mod backend; +mod htcl_backend; +mod server; +mod src_complete; +mod workspace; + +pub use backend::{LanguageBackend, SymbolInfo}; +pub use htcl_backend::HtclBackend; +pub use server::Analyzer; + +use tower_lsp::{LspService, Server}; + +/// Run the LSP server on stdio. Returns when the editor disconnects. +/// +/// Both the standalone `vw-analyzer` binary and the `vw analyzer` +/// subcommand call this so the editor sees identical behavior either +/// way. +pub async fn run_stdio() { + let stdin = tokio::io::stdin(); + let stdout = tokio::io::stdout(); + let (service, socket) = LspService::new(Analyzer::new); + Server::new(stdin, stdout, socket).serve(service).await; +} diff --git a/vw-analyzer/src/main.rs b/vw-analyzer/src/main.rs new file mode 100644 index 0000000..c349930 --- /dev/null +++ b/vw-analyzer/src/main.rs @@ -0,0 +1,27 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! `vw-analyzer` binary entry point. +//! +//! Spawns the LSP server on stdio. The editor (or `vw analyzer` +//! subcommand) exec's this binary directly. + +#[tokio::main] +async fn main() { + // Silent by default — Helix and most LSP clients flag any stderr + // output from a language server as an error. Opt in with + // `VW_ANALYZER_LOG=info` (or `debug`/`trace`) for development. + // ANSI off so colors don't show up as escape codes in the + // client's log viewer. + tracing_subscriber::fmt() + .with_writer(std::io::stderr) + .with_ansi(false) + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_env("VW_ANALYZER_LOG") + .unwrap_or_else(|_| "vw_analyzer=off".into()), + ) + .init(); + + vw_analyzer::run_stdio().await; +} diff --git a/vw-analyzer/src/server.rs b/vw-analyzer/src/server.rs new file mode 100644 index 0000000..f4cca1b --- /dev/null +++ b/vw-analyzer/src/server.rs @@ -0,0 +1,206 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! LSP server entry point. Owns the per-language backends and +//! dispatches `textDocument/*` requests by URI. + +use std::sync::Arc; + +use tower_lsp::jsonrpc::Result; +use tower_lsp::lsp_types::*; +use tower_lsp::{Client, LanguageServer}; +use tracing::{debug, info}; + +use crate::backend::LanguageBackend; +use crate::htcl_backend::HtclBackend; + +pub struct Analyzer { + client: Client, + backends: Vec>, +} + +impl Analyzer { + pub fn new(client: Client) -> Self { + let backends: Vec> = + vec![Arc::new(HtclBackend::new())]; + Self { client, backends } + } + + fn backend_for(&self, uri: &Url) -> Option> { + self.backends.iter().find(|b| b.handles(uri)).cloned() + } + + async fn publish_diagnostics(&self, uri: Url, version: Option) { + let Some(backend) = self.backend_for(&uri) else { + return; + }; + let diags = backend.diagnostics(&uri).await; + self.client.publish_diagnostics(uri, diags, version).await; + } +} + +#[tower_lsp::async_trait] +impl LanguageServer for Analyzer { + async fn initialize( + &self, + _params: InitializeParams, + ) -> Result { + info!("vw-analyzer initializing"); + Ok(InitializeResult { + server_info: Some(ServerInfo { + name: "vw-analyzer".into(), + version: Some(env!("CARGO_PKG_VERSION").into()), + }), + capabilities: ServerCapabilities { + text_document_sync: Some(TextDocumentSyncCapability::Kind( + TextDocumentSyncKind::FULL, + )), + document_symbol_provider: Some(OneOf::Left(true)), + hover_provider: Some(HoverProviderCapability::Simple(true)), + definition_provider: Some(OneOf::Left(true)), + completion_provider: Some(CompletionOptions { + // `-` opens a flag list; a space after a flag pops + // its `@enum(…)` choices (or the next available + // flags when there are no enum constraints), so + // the user doesn't have to start typing blind to + // discover options. + trigger_characters: Some(vec![ + "-".to_string(), + " ".to_string(), + ]), + ..Default::default() + }), + signature_help_provider: Some(SignatureHelpOptions { + trigger_characters: Some(vec![ + " ".to_string(), + "-".to_string(), + ]), + retrigger_characters: Some(vec!["-".to_string()]), + work_done_progress_options: Default::default(), + }), + ..Default::default() + }, + }) + } + + async fn initialized(&self, _: InitializedParams) { + info!("vw-analyzer initialized"); + } + + async fn shutdown(&self) -> Result<()> { + info!("vw-analyzer shutting down"); + Ok(()) + } + + async fn did_open(&self, params: DidOpenTextDocumentParams) { + let uri = params.text_document.uri.clone(); + let version = Some(params.text_document.version); + debug!(%uri, "did_open"); + if let Some(backend) = self.backend_for(&uri) { + backend + .set_text(uri.clone(), params.text_document.text) + .await; + } + self.publish_diagnostics(uri, version).await; + } + + async fn did_change(&self, params: DidChangeTextDocumentParams) { + let uri = params.text_document.uri.clone(); + let version = Some(params.text_document.version); + let Some(backend) = self.backend_for(&uri) else { + return; + }; + // FULL sync: each change is the entire new text. + if let Some(change) = params.content_changes.into_iter().last() { + backend.set_text(uri.clone(), change.text).await; + } + self.publish_diagnostics(uri, version).await; + } + + async fn did_close(&self, params: DidCloseTextDocumentParams) { + let uri = params.text_document.uri; + if let Some(backend) = self.backend_for(&uri) { + backend.close(&uri).await; + } + } + + async fn document_symbol( + &self, + params: DocumentSymbolParams, + ) -> Result> { + let uri = params.text_document.uri; + let Some(backend) = self.backend_for(&uri) else { + return Ok(None); + }; + let symbols = backend.document_symbols(&uri).await; + if symbols.is_empty() { + Ok(None) + } else { + Ok(Some(DocumentSymbolResponse::Nested(symbols))) + } + } + + async fn hover(&self, params: HoverParams) -> Result> { + let uri = params.text_document_position_params.text_document.uri; + let position = params.text_document_position_params.position; + let Some(backend) = self.backend_for(&uri) else { + return Ok(None); + }; + Ok(backend.hover(&uri, position).await) + } + + async fn goto_definition( + &self, + params: GotoDefinitionParams, + ) -> Result> { + let uri = params + .text_document_position_params + .text_document + .uri + .clone(); + let position = params.text_document_position_params.position; + let Some(backend) = self.backend_for(&uri) else { + return Ok(None); + }; + let locs = backend.goto_definition(&uri, position).await; + if locs.is_empty() { + Ok(None) + } else { + Ok(Some(GotoDefinitionResponse::Array(locs))) + } + } + + async fn completion( + &self, + params: CompletionParams, + ) -> Result> { + let uri = params.text_document_position.text_document.uri; + let position = params.text_document_position.position; + let Some(backend) = self.backend_for(&uri) else { + return Ok(None); + }; + let items = backend.completion(&uri, position).await; + if items.is_empty() { + Ok(None) + } else { + Ok(Some(CompletionResponse::Array(items))) + } + } + + async fn signature_help( + &self, + params: SignatureHelpParams, + ) -> Result> { + let uri = params + .text_document_position_params + .text_document + .uri + .clone(); + let position = params.text_document_position_params.position; + let Some(backend) = self.backend_for(&uri) else { + return Ok(None); + }; + Ok(backend.signature_help(&uri, position).await) + } +} diff --git a/vw-analyzer/src/src_complete.rs b/vw-analyzer/src/src_complete.rs new file mode 100644 index 0000000..09059dc --- /dev/null +++ b/vw-analyzer/src/src_complete.rs @@ -0,0 +1,367 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Filesystem-aware completion for `src` import paths. +//! +//! The `vw-htcl` crate stays free of filesystem concerns, so this +//! lives in the analyzer alongside the workspace resolver. When the +//! cursor sits in the path-position of a `src` command, we +//! enumerate the directory implied by the partial path and offer: +//! +//! - every `.htcl` file at that level, labelled by basename (no +//! extension), and +//! - every subdirectory at that level that transitively contains at +//! least one `.htcl` file, labelled with a trailing `/`. +//! +//! Three flavors of partial are recognized, matching +//! [`vw_htcl::src_path::classify`]: +//! +//! - `@/...` — resolve against the workspace dependency's cached +//! root. +//! - `/abs/...` — filesystem-absolute. +//! - anything else — relative to the importing file's directory. +//! +//! When the partial is just `@` or `@` (no `/` yet), suggest +//! dependency names from the workspace resolver instead. + +use std::path::{Path, PathBuf}; + +use tower_lsp::lsp_types::{ + CompletionItem, CompletionItemKind, CompletionTextEdit, InsertTextFormat, + Position, Range, TextEdit, +}; +use vw_htcl::cmdline::CmdLine; +use vw_htcl::src_path::{classify, PathKind}; +use vw_htcl::{LineCol, LineIndex, Resolver, Span}; + +/// True when the cursor sits in the path-position of a `src` command +/// (i.e. the first complete word is `src` and we're typing the path). +pub fn is_src_path_context(line: &CmdLine<'_>) -> bool { + line.words.first().copied() == Some("src") && line.words.len() == 1 +} + +/// Generate path completions for `line.partial`, treating it as the +/// `` of `src `. +/// +/// `entry_file` is the open file's path on disk; it anchors relative +/// imports and lets us walk up to the workspace's `vw.toml` for dep +/// resolution. `line_index` maps source offsets to LSP positions. +pub fn src_path_completions( + entry_file: &Path, + line: &CmdLine<'_>, + line_index: &LineIndex, + resolver: &Resolver, +) -> Vec { + let partial = line.partial; + + // `@` with no `/` yet → dep-name completion. Replace the + // whole partial with `@/` so the next completion fires on + // the contents. + if let Some(prefix_after_at) = partial.strip_prefix('@') { + if !prefix_after_at.contains('/') { + return dep_name_completions( + resolver, + prefix_after_at, + line.partial_span, + line_index, + ); + } + } + + // Otherwise: resolve the directory the partial points into, then + // enumerate it. + let Some((dir, segment_start)) = resolve_dir(entry_file, resolver, partial) + else { + return Vec::new(); + }; + let segment = &partial[segment_start..]; + let replace = Span::new( + line.partial_span.start + segment_start as u32, + line.partial_span.end, + ); + enumerate_entries(&dir, segment, replace, line_index) +} + +/// Resolve the *directory* part of `partial` to an on-disk path, plus +/// the byte offset into `partial` where the trailing (still-being- +/// typed) segment begins. Returns `None` when the partial points at a +/// dep that doesn't exist or a path that can't be classified. +fn resolve_dir( + entry_file: &Path, + resolver: &Resolver, + partial: &str, +) -> Option<(PathBuf, usize)> { + let kind = classify(partial).kind; + let (base, body) = match &kind { + PathKind::Relative => { + let dir = entry_file.parent()?.to_path_buf(); + (dir, partial) + } + PathKind::Absolute => { + (PathBuf::from("/"), partial.trim_start_matches('/')) + } + PathKind::Named { name, subpath } => { + let root = resolver.dep_root(name)?.to_path_buf(); + (root, subpath.as_str()) + } + }; + // Split `body` at its last `/`: everything before is the + // sub-directory walk; everything after is the segment being typed + // (used for the replace range and ignored for enumeration). + let (subdir, trailing_segment) = match body.rfind('/') { + Some(i) => (&body[..i], &body[i + 1..]), + None => ("", body), + }; + let mut dir = base; + if !subdir.is_empty() { + dir.push(subdir); + } + let segment_start = partial.len() - trailing_segment.len(); + Some((dir, segment_start)) +} + +fn enumerate_entries( + dir: &Path, + segment: &str, + replace: Span, + line_index: &LineIndex, +) -> Vec { + let Ok(entries) = std::fs::read_dir(dir) else { + return Vec::new(); + }; + let _ = segment; // LSP client filters by prefix; we list everything. + + let mut out: Vec<(String, CompletionItemKind)> = Vec::new(); + for entry in entries.flatten() { + let path = entry.path(); + let Some(name) = path.file_name().and_then(|n| n.to_str()) else { + continue; + }; + if name.starts_with('.') { + continue; + } + let ft = entry.file_type().ok(); + if ft.is_some_and(|t| t.is_dir()) { + if dir_has_htcl(&path) { + out.push((format!("{name}/"), CompletionItemKind::FOLDER)); + } + } else if path.extension().and_then(|s| s.to_str()) == Some("htcl") { + let stem = + path.file_stem().and_then(|s| s.to_str()).unwrap_or(name); + // `module.htcl` is the dep's default entry point, already + // reachable as bare `@` — listing it here as + // `@/module` would just be a noisier alias. + if stem == vw_htcl::src_path::DEFAULT_MODULE { + continue; + } + out.push((stem.to_string(), CompletionItemKind::FILE)); + } + } + out.sort_by(|a, b| a.0.cmp(&b.0)); + let range = lsp_range(replace, line_index); + out.into_iter() + .map(|(label, kind)| build_item(label, kind, range)) + .collect() +} + +fn dep_name_completions( + resolver: &Resolver, + _prefix: &str, + partial_span: Span, + line_index: &LineIndex, +) -> Vec { + let mut deps: Vec<(&str, &Path)> = resolver.deps().collect(); + deps.sort_by_key(|(n, _)| *n); + let range = lsp_range(partial_span, line_index); + // Bare `@` is a complete import on its own (resolves to the + // dep's `module.htcl`), so don't append a trailing `/` — that + // would leave behind invalid syntax for a user who just wanted + // the default module. Users who want to drill in still type `/` + // themselves, which retriggers completion against the dep root. + deps.into_iter() + .map(|(name, _)| { + build_item(format!("@{name}"), CompletionItemKind::MODULE, range) + }) + .collect() +} + +/// True if `dir` contains, or transitively contains, any `.htcl` file. +/// Short-circuits on the first hit. +fn dir_has_htcl(dir: &Path) -> bool { + let Ok(entries) = std::fs::read_dir(dir) else { + return false; + }; + for entry in entries.flatten() { + let path = entry.path(); + let Ok(ft) = entry.file_type() else { continue }; + if ft.is_file() { + if path.extension().and_then(|s| s.to_str()) == Some("htcl") { + return true; + } + } else if ft.is_dir() { + // Skip dot-dirs to keep `.git`, `.svn`, etc. out of the + // walk. + if path + .file_name() + .and_then(|n| n.to_str()) + .is_some_and(|n| n.starts_with('.')) + { + continue; + } + if dir_has_htcl(&path) { + return true; + } + } + } + false +} + +fn build_item( + label: String, + kind: CompletionItemKind, + range: Range, +) -> CompletionItem { + let new_text = label.clone(); + CompletionItem { + label, + kind: Some(kind), + insert_text_format: Some(InsertTextFormat::PLAIN_TEXT), + text_edit: Some(CompletionTextEdit::Edit(TextEdit { range, new_text })), + ..Default::default() + } +} + +fn lsp_range(span: Span, line_index: &LineIndex) -> Range { + let (start, end) = line_index.range(span); + Range { + start: lc_to_pos(start), + end: lc_to_pos(end), + } +} + +fn lc_to_pos(lc: LineCol) -> Position { + Position { + line: lc.line, + character: lc.character, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use vw_htcl::cmdline; + + fn workspace_fixture() -> (tempfile::TempDir, PathBuf, Resolver) { + // amd-htcl/ + // module.htcl ← default entry, HIDDEN from list + // cmd.htcl + // ip.htcl + // cmd/foo.htcl + // scripts/ ← no .htcl, should NOT appear + // ip/bd/cell.htcl ← nested, ip/ should appear + let dir = tempfile::tempdir().unwrap(); + let dep = dir.path().join("amd-htcl"); + fs::create_dir_all(dep.join("cmd")).unwrap(); + fs::create_dir_all(dep.join("scripts")).unwrap(); + fs::create_dir_all(dep.join("ip/bd")).unwrap(); + fs::write(dep.join("module.htcl"), "# entry").unwrap(); + fs::write(dep.join("cmd.htcl"), "# stub").unwrap(); + fs::write(dep.join("ip.htcl"), "# stub").unwrap(); + fs::write(dep.join("cmd/foo.htcl"), "# stub").unwrap(); + fs::write(dep.join("scripts/notes.txt"), "not htcl").unwrap(); + fs::write(dep.join("ip/bd/cell.htcl"), "# stub").unwrap(); + // entry file + let entry = dir.path().join("prime.htcl"); + fs::write(&entry, "src @amd-htcl/cmd\n").unwrap(); + let resolver = Resolver::new().with_dep("amd-htcl", dep); + // hold dir handle so files persist for the test + (dir, entry, resolver) + } + + fn labels_for(src: &str, entry: &Path, resolver: &Resolver) -> Vec { + let line = cmdline::analyze(src, src.len() as u32); + let idx = LineIndex::new(src); + let items = src_path_completions(entry, &line, &idx, resolver); + let mut labels: Vec = + items.into_iter().map(|c| c.label).collect(); + labels.sort(); + labels + } + + #[test] + fn lists_dep_root_after_trailing_slash() { + let (_dir, entry, resolver) = workspace_fixture(); + let labels = labels_for("src @amd-htcl/", &entry, &resolver); + // .htcl files: cmd, ip. dirs with .htcl: cmd/, ip/. + // scripts/ is omitted (no .htcl inside). + assert_eq!(labels, vec!["cmd", "cmd/", "ip", "ip/"]); + } + + #[test] + fn lists_dep_subdirectory() { + let (_dir, entry, resolver) = workspace_fixture(); + let labels = labels_for("src @amd-htcl/ip/", &entry, &resolver); + // ip/ has bd/ (containing cell.htcl) — bd/ should show; no other entries. + assert_eq!(labels, vec!["bd/"]); + } + + #[test] + fn partial_segment_replaces_only_the_segment() { + // User has typed `src @amd-htcl/c` — replace should cover just + // the `c`, not the whole `@amd-htcl/c`. + let src = "src @amd-htcl/c"; + let (_dir, entry, resolver) = workspace_fixture(); + let line = cmdline::analyze(src, src.len() as u32); + let idx = LineIndex::new(src); + let items = src_path_completions(&entry, &line, &idx, &resolver); + let labels: Vec = + items.iter().map(|c| c.label.clone()).collect(); + // Both `cmd` and `cmd/` start with `c`. + assert!(labels.contains(&"cmd".to_string()), "{labels:?}"); + // The text-edit range should cover only the `c` (single char on line 0). + let edit = match items[0].text_edit.as_ref() { + Some(CompletionTextEdit::Edit(e)) => e, + _ => panic!("expected text edit"), + }; + assert_eq!(edit.range.start.character, 14, "{:?}", edit.range); + assert_eq!(edit.range.end.character, 15); + } + + #[test] + fn dep_name_completion_when_no_slash_yet() { + // Bare `@` is a complete import on its own, so the + // completion shouldn't append `/` — selecting `@amd-htcl` + // alone should leave valid syntax that resolves to + // `/module.htcl`. + let (_dir, entry, resolver) = workspace_fixture(); + let labels = labels_for("src @", &entry, &resolver); + assert_eq!(labels, vec!["@amd-htcl"]); + } + + #[test] + fn dep_root_listing_hides_module_htcl() { + // `module.htcl` is the default entry — already importable as + // bare `@amd-htcl`, so it should not show up as `module` in + // the per-dep file listing. + let (_dir, entry, resolver) = workspace_fixture(); + let labels = labels_for("src @amd-htcl/", &entry, &resolver); + assert!(!labels.contains(&"module".to_string()), "{labels:?}"); + // Sanity: the non-default modules still show. + assert!(labels.contains(&"cmd".to_string())); + assert!(labels.contains(&"ip".to_string())); + } + + #[test] + fn relative_completion_uses_entry_directory() { + let dir = tempfile::tempdir().unwrap(); + fs::create_dir_all(dir.path().join("ip")).unwrap(); + fs::write(dir.path().join("ip/cips.htcl"), "# stub").unwrap(); + let entry = dir.path().join("prime.htcl"); + fs::write(&entry, "src ip/\n").unwrap(); + let resolver = Resolver::new(); + let labels = labels_for("src ip/", &entry, &resolver); + assert_eq!(labels, vec!["cips"]); + } +} diff --git a/vw-analyzer/src/workspace.rs b/vw-analyzer/src/workspace.rs new file mode 100644 index 0000000..0a6df6a --- /dev/null +++ b/vw-analyzer/src/workspace.rs @@ -0,0 +1,206 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Workspace-aware helpers for the analyzer. +//! +//! The bare LSP backend deals with one file at a time. Cross-file +//! features — goto-definition into an imported module, completion of +//! procs defined in `@dep/foo`, validating a call against a signature +//! that lives elsewhere — need a view that spans the importing file +//! plus everything it pulled in via `src`. +//! +//! This module computes that view on demand. It's deliberately +//! re-computed per query rather than cached: htcl files are tiny next +//! to a Vivado IP wrapper, the LSP's edits-per-second is modest, and a +//! cache would have to deal with invalidation when an `@dep/...` +//! file on disk changes. A targeted cache is a sensible follow-up once +//! the access pattern is settled. + +use std::collections::HashSet; +use std::fs; +use std::path::{Path, PathBuf}; + +use camino::{Utf8Path, Utf8PathBuf}; +use tower_lsp::lsp_types::Url; + +use vw_htcl::{parse, CommandKind, Resolver, SrcImport, Stmt}; + +/// A flattened source view used for cross-file analysis. +/// +/// `view_source` is the local file's text *first* (so the cursor's +/// byte offset in the open document is the same offset in the view — +/// hover/goto/etc. don't need offset translation for the local file), +/// followed by every transitively imported file's text concatenated. +/// Each appended region is recorded in [`imports`](Self::imports) so +/// spans landing there can be mapped back to the file they came from. +pub struct WorkspaceView { + pub view_source: String, + /// Byte length of the *local* file's contribution. Spans whose + /// `start < local_len` belong to the open file; everything past + /// that lives in some imported file. + pub local_len: u32, + pub imports: Vec, +} + +pub struct ImportRegion { + /// Inclusive start offset in `view_source`. + pub start: u32, + /// Exclusive end offset in `view_source`. + pub end: u32, + pub file_uri: Url, +} + +impl WorkspaceView { + /// If `offset` lies inside an imported file's region, return the + /// import region plus the file-local offset of that span; `None` + /// means the offset is in the open file itself. + pub fn locate(&self, offset: u32) -> Option<(&ImportRegion, u32)> { + if offset < self.local_len { + return None; + } + self.imports + .iter() + .find(|r| offset >= r.start && offset < r.end) + .map(|r| (r, offset - r.start)) + } +} + +/// Build a workspace view by reading every file the entry transitively +/// `src`s. Returns a view with `imports` empty when the entry can't be +/// resolved to a filesystem path or has no imports — the analyzer can +/// still use it; it just won't see anything cross-file. +pub fn build_view(file_uri: &Url, local_text: &str) -> WorkspaceView { + let mut view = WorkspaceView { + view_source: local_text.to_string(), + local_len: local_text.len() as u32, + imports: Vec::new(), + }; + + let Ok(file_path) = file_uri.to_file_path() else { + return view; + }; + let parent = file_path + .parent() + .map(Path::to_path_buf) + .unwrap_or_else(|| PathBuf::from(".")); + let resolver = build_resolver(&file_path); + + let mut loaded: HashSet = HashSet::new(); + if let Ok(canonical) = file_path.canonicalize() { + loaded.insert(canonical); + } + let mut queue: Vec<(PathBuf, String)> = Vec::new(); + collect_imports(local_text, &parent, &resolver, &mut loaded, &mut queue); + + while let Some((path, text)) = queue.pop() { + view.view_source.push('\n'); + // Record `start` *after* the separator so a span's local + // offset within the imported file is `span.start - start` + // with no off-by-one for the inserted newline. + let start = view.view_source.len() as u32; + view.view_source.push_str(&text); + let end = view.view_source.len() as u32; + if let Ok(import_uri) = Url::from_file_path(&path) { + view.imports.push(ImportRegion { + start, + end, + file_uri: import_uri, + }); + } + // Recurse into this file's own imports. + let import_parent = path + .parent() + .map(Path::to_path_buf) + .unwrap_or_else(|| PathBuf::from(".")); + collect_imports( + &text, + &import_parent, + &resolver, + &mut loaded, + &mut queue, + ); + } + + view +} + +/// Build a [`Resolver`] for the workspace that owns `entry_file`, by +/// walking up to find `vw.toml` and pulling dep cache paths through +/// `vw-lib`. Returns an empty resolver when no workspace is found — +/// relative/absolute `src` imports still work; `@name/` ones won't. +pub fn build_resolver(entry_file: &Path) -> Resolver { + let mut resolver = Resolver::new(); + let Some(workspace_dir) = find_workspace_dir(entry_file) else { + return resolver; + }; + // Transitive: a library that does `src @other-lib/...` shouldn't + // force every consumer to redeclare `other-lib` in their own + // `vw.toml`. The walker pulls in each dep's own deps so the + // resolver sees the whole graph (Cargo-style first-seen-wins on + // name conflicts). + if let Ok(paths) = vw_lib::transitive_dep_cache_paths(&workspace_dir) { + for (name, path) in paths { + resolver = resolver.with_dep(name, path); + } + } + resolver +} + +/// Walk up from `start`'s parent directory looking for a `vw.toml`. +fn find_workspace_dir(start: &Path) -> Option { + let mut cur = start.parent()?.to_path_buf(); + loop { + if cur.join("vw.toml").exists() { + return Utf8PathBuf::from_path_buf(cur).ok(); + } + cur = cur.parent()?.to_path_buf(); + } +} + +/// Parse `text` and queue each new (not yet seen) `src` resolution as +/// `(canonical_path, file_text)` for the caller to incorporate. +fn collect_imports( + text: &str, + parent_dir: &Path, + resolver: &Resolver, + loaded: &mut HashSet, + queue: &mut Vec<(PathBuf, String)>, +) { + let parsed = parse(text); + for stmt in &parsed.document.stmts { + let Stmt::Command(cmd) = stmt else { continue }; + let CommandKind::Src(SrcImport { + path: Some(raw), .. + }) = &cmd.kind + else { + continue; + }; + let Ok(resolved) = resolver.resolve(parent_dir, raw) else { + continue; + }; + // Resolver already canonicalizes when possible; defensive + // dedup either way. + if !loaded.insert(resolved.clone()) { + continue; + } + let Ok(content) = fs::read_to_string(&resolved) else { + continue; + }; + queue.push((resolved, content)); + } +} + +/// Public helper: resolve the import at `raw` from `entry_file`'s +/// directory. Used by goto-on-import-path so the analyzer can return +/// a Location pointing at the imported file. +pub fn resolve_import(entry_file: &Path, raw: &str) -> Option { + let parent = entry_file.parent()?; + build_resolver(entry_file).resolve(parent, raw).ok() +} + +/// Allow `&Utf8Path` callers to canonicalize through us. +#[allow(dead_code)] +pub fn workspace_root(entry_file: &Utf8Path) -> Option { + find_workspace_dir(Path::new(entry_file.as_str())) +} diff --git a/vw-cli/Cargo.toml b/vw-cli/Cargo.toml index c27a1aa..a8ebd05 100644 --- a/vw-cli/Cargo.toml +++ b/vw-cli/Cargo.toml @@ -14,7 +14,15 @@ path = "src/main.rs" [dependencies] vw-lib = { path = "../vw-lib" } +vw-htcl = { path = "../vw-htcl" } +vw-eda = { path = "../vw-eda" } +vw-vivado = { path = "../vw-vivado" } +vw-analyzer = { path = "../vw-analyzer" } +vw-ip = { path = "../vw-ip" } +vw-htcl-cmd = { path = "../vw-htcl-cmd" } +vw-repl = { path = "../vw-repl" } clap = { version = "4.0", features = ["derive"] } colored = "2.0" tokio.workspace = true camino.workspace = true +tracing-subscriber.workspace = true diff --git a/vw-cli/src/main.rs b/vw-cli/src/main.rs index ce4386b..8877549 100644 --- a/vw-cli/src/main.rs +++ b/vw-cli/src/main.rs @@ -2,13 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. -use camino::Utf8PathBuf; +use camino::{Utf8Path, Utf8PathBuf}; use clap::{Parser, Subcommand, ValueEnum}; use colored::*; use std::collections::HashSet; use std::fmt; use std::process; +use vw_eda::EdaBackend; use vw_lib::{ add_dependency_with_token, clear_cache, extract_hostname_from_repo_url, generate_deps_tcl, get_access_credentials_from_netrc, init_workspace, @@ -130,6 +131,116 @@ enum Commands { )] scaffold: bool, }, + #[command(about = "Run an htcl script against a Vivado worker")] + Run { + #[arg(help = "Path to an .htcl source file")] + file: Utf8PathBuf, + #[arg( + long, + help = "Parse and print diagnostics only; don't launch Vivado" + )] + check: bool, + #[arg( + short, + long, + help = "Forward Vivado's banner and info messages to stderr" + )] + verbose: bool, + }, + #[command(about = "Launch the vw analyzer LSP server on stdio")] + Analyzer, + #[command( + about = "Interactive htcl REPL backed by a long-lived Vivado worker" + )] + Repl { + #[arg( + short, + long, + help = "Forward Vivado's banner / info chatter to scrollback" + )] + verbose: bool, + #[arg( + long = "load", + value_name = "FILE", + help = "Source FILE into the session as soon as Vivado is up" + )] + initial_load: Option, + }, + #[command( + about = "Parse and run analysis on htcl files without executing them" + )] + Check { + #[arg(required = true, help = "One or more .htcl source files")] + files: Vec, + }, + #[command(subcommand, about = "IP-XACT tooling")] + Ip(IpCommand), + #[command( + subcommand, + name = "htcl-cmd", + about = "Generate htcl wrappers from Vivado command references" + )] + HtclCmd(HtclCmdCommand), +} + +#[derive(Subcommand)] +enum HtclCmdCommand { + #[command( + about = "Generate an htcl wrapper from a Vivado man-page command \ + reference" + )] + Generate { + #[arg(help = "Path to a Vivado man-page file (e.g. \ + /doc/eng/man/add_files)")] + input: Utf8PathBuf, + #[arg(short, long, help = "Output file (defaults to stdout)")] + output: Option, + #[arg( + long, + help = "Command name to wrap (defaults to the input file stem)" + )] + name: Option, + #[arg( + long, + value_name = "FILE", + help = "Per-command constraint overrides (TOML)" + )] + constraints: Option, + }, +} + +#[derive(Subcommand)] +enum IpCommand { + #[command(about = "Generate an htcl wrapper from an IP-XACT component")] + Generate { + #[arg(help = "Path to an IP-XACT component XML file")] + input: Utf8PathBuf, + #[arg(short, long, help = "Output file (defaults to stdout)")] + output: Option, + #[arg( + long, + help = "Include parameters whose resolve attribute is not 'user'" + )] + include_internal: bool, + #[arg( + long = "preset", + value_name = "FILE", + help = "Supplementary Vivado preset XML file (`` format). May be given multiple times. The \ + declared values are merged into `@enum(...)` lists in the \ + generated wrapper, on top of the IP-XACT `` \ + entries." + )] + presets: Vec, + #[arg( + long, + help = "Skip auto-discovery of preset files under the Vivado \ + `data/versal/ps_pmc//` tree. Use this if the \ + discovered files are wrong or you only want the explicit \ + `--preset` ones." + )] + no_auto_presets: bool, + }, } /// Helper function to get access credentials for a repository URL from netrc if available @@ -151,9 +262,8 @@ async fn get_access_credentials_for_workspace( // Load workspace config and check if any dependencies might need authentication if let Ok(config) = load_workspace_config(workspace_dir) { for dep in config.dependencies.values() { - if let Some(creds) = - get_access_credentials_for_repo(&dep.repo).await - { + let Some(repo) = dep.repo() else { continue }; + if let Some(creds) = get_access_credentials_for_repo(repo).await { return Some(creds); } } @@ -319,13 +429,14 @@ async fn main() { VersionInfo::Locked { commit } => { format!(" ({})", &commit[..8.min(commit.len())]) } + VersionInfo::Local => " (local)".to_string(), VersionInfo::Unknown => String::new(), }; println!( " {} - {}{}", dep.name.cyan(), - dep.repo, + dep.source, version_info.bright_black() ); } @@ -452,5 +563,734 @@ async fn main() { process::exit(1); } } + Commands::Run { + file, + check, + verbose, + } => { + if let Err(e) = run_htcl(&file, check, verbose).await { + eprintln!("{} {e}", "error:".bright_red()); + process::exit(1); + } + } + Commands::Analyzer => { + init_analyzer_logging(); + vw_analyzer::run_stdio().await; + } + Commands::Repl { + verbose, + initial_load, + } => { + if let Err(e) = vw_repl::run(vw_repl::ReplOptions { + verbose, + initial_load, + }) + .await + { + eprintln!("{} {e}", "error:".bright_red()); + process::exit(1); + } + } + Commands::Check { files } => { + let mut had_errors = false; + for file in &files { + match check_htcl(file).await { + Ok(file_errs) => { + if file_errs { + had_errors = true; + } + } + Err(e) => { + had_errors = true; + eprintln!("{} {file}: {e}", "error:".bright_red()); + } + } + } + if had_errors { + process::exit(1); + } + } + Commands::Ip(cmd) => match cmd { + IpCommand::Generate { + input, + output, + include_internal, + presets, + no_auto_presets, + } => { + if let Err(e) = run_ip_generate( + &input, + output.as_deref(), + include_internal, + &presets, + no_auto_presets, + ) { + eprintln!("{} {e}", "error:".bright_red()); + process::exit(1); + } + } + }, + Commands::HtclCmd(cmd) => match cmd { + HtclCmdCommand::Generate { + input, + output, + name, + constraints, + } => { + if let Err(e) = run_htcl_cmd_generate( + &input, + output.as_deref(), + name.as_deref(), + constraints.as_deref(), + ) { + eprintln!("{} {e}", "error:".bright_red()); + process::exit(1); + } + } + }, + } +} + +fn run_htcl_cmd_generate( + input: &Utf8Path, + output: Option<&Utf8Path>, + name: Option<&str>, + constraints_path: Option<&Utf8Path>, +) -> Result<(), String> { + let page = vw_htcl_cmd::load(input.as_std_path(), name) + .map_err(|e| format!("loading {input}: {e}"))?; + let constraints = match constraints_path { + Some(p) => vw_htcl_cmd::ConstraintsTable::load(p.as_std_path()) + .map_err(|e| format!("loading constraints: {e}"))?, + None => vw_htcl_cmd::ConstraintsTable::empty(), + }; + let opts = vw_htcl_cmd::GenerateOptions { + constraints, + ..Default::default() + }; + let text = vw_htcl_cmd::generate(&page, &opts); + match output { + Some(path) => std::fs::write(path, &text) + .map_err(|e| format!("writing {path}: {e}"))?, + None => print!("{text}"), + } + Ok(()) +} + +fn run_ip_generate( + input: &Utf8Path, + output: Option<&Utf8Path>, + include_internal: bool, + explicit_presets: &[Utf8PathBuf], + no_auto_presets: bool, +) -> Result<(), String> { + let component = + vw_ip::load(input).map_err(|e| format!("loading {input}: {e}"))?; + + // Combine explicit `--preset` files with what we can auto-discover + // under Vivado's `data/versal/ps_pmc//` tree. + let mut preset_paths: Vec = explicit_presets + .iter() + .map(|p| std::path::PathBuf::from(p.as_str())) + .collect(); + if !no_auto_presets { + let discovered = + vw_ip::discover_presets(std::path::Path::new(input.as_str())); + for p in discovered { + if !preset_paths.contains(&p) { + preset_paths.push(p); + } + } + } + for p in &preset_paths { + eprintln!("{:>12} {}", "Sourcing".bright_green().bold(), p.display()); + } + let presets = if preset_paths.is_empty() { + vw_ip::PresetMap::new() + } else { + vw_ip::load_presets(&preset_paths) + .map_err(|e| format!("loading presets: {e}"))? + }; + + // Sub-proc schemas for Xilinx `structured_tcldict` parameters + // (PS_PMC_CONFIG, etc.). Empty when the component isn't a CIPS + // and doesn't have an accompanying schema tree. + let dict_schemas = + vw_ip::load_cips_dict_schemas(std::path::Path::new(input.as_str())); + for name in dict_schemas.keys() { + eprintln!( + "{:>12} schema for {name} ({} fields)", + "Loaded".bright_green().bold(), + dict_schemas[name].fields.len() + ); + } + + let opts = vw_ip::GenerateOptions { + user_configurable_only: !include_internal, + ..Default::default() + }; + let text = vw_ip::generate(&component, &presets, &dict_schemas, &opts); + match output { + Some(path) => std::fs::write(path, &text) + .map_err(|e| format!("writing {path}: {e}"))?, + None => print!("{text}"), + } + Ok(()) +} + +/// Read `entry` and recursively resolve its `src` imports. Looks for +/// a `vw.toml` in the entry file's parent chain to discover the +/// workspace; falls back to an empty resolver (so relative/absolute +/// imports still work, but `@name/` imports fail with a clear error) +/// when no workspace is found. +/// +/// A [`CliObserver`] is attached so the loader's progress prints in +/// real time as `Sourcing …` / `Checking …` lines. +fn load_htcl_program( + entry: &Utf8Path, +) -> Result> { + let entry_path = std::path::Path::new(entry.as_str()); + let workspace_dir = find_workspace_dir(entry); + let mut resolver = vw_htcl::Resolver::new(); + if let Some(ws) = workspace_dir.as_deref() { + // Transitive resolution so a library's `src @other/...` + // import works even when the consumer hasn't redeclared + // `other` in their own `vw.toml`. + if let Ok(paths) = vw_lib::transitive_dep_cache_paths(ws) { + for (name, path) in paths { + resolver = resolver.with_dep(name, path); + } + } + } + let mut observer = CliObserver; + Ok(vw_htcl::load_program_with_observer( + entry_path, + &resolver, + &mut observer, + )?) +} + +/// Prints Cargo-style `Sourcing …` and `Checking …` lines as the +/// loader walks the dependency tree. +struct CliObserver; + +impl vw_htcl::LoadObserver for CliObserver { + fn on_source(&mut self, raw: &str) { + println!( + "{:>12} {}", + "Sourcing".bright_green().bold(), + friendly_import(raw) + ); + } + fn on_parsed(&mut self, file: &std::path::Path, raw: Option<&str>) { + let label = match raw { + Some(r) => friendly_import(r), + None => file + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("?") + .to_string(), + }; + println!("{:>12} {}", "Checking".bright_green().bold(), label); + } +} + +/// Trim the `@` prefix and any trailing `.htcl` from an import path +/// so the CLI shows `amd-htcl/cpm5` rather than `@amd-htcl/cpm5.htcl` +/// or a long filesystem path. +fn friendly_import(raw: &str) -> String { + raw.trim_start_matches('@') + .trim_end_matches(".htcl") + .to_string() +} + +/// Walk up from `start`'s parent directory looking for a `vw.toml`. +fn find_workspace_dir(start: &Utf8Path) -> Option { + let mut cur = start.parent()?.to_path_buf(); + loop { + if cur.join("vw.toml").exists() { + return Some(cur); + } + cur = cur.parent()?.to_path_buf(); + } +} + +fn init_analyzer_logging() { + // Silent by default — see the matching note in vw-analyzer's main. + let filter = tracing_subscriber::EnvFilter::try_from_env("VW_ANALYZER_LOG") + .unwrap_or_else(|_| "vw_analyzer=off".into()); + let _ = tracing_subscriber::fmt() + .with_writer(std::io::stderr) + .with_ansi(false) + .with_env_filter(filter) + .try_init(); +} + +/// Run parse + signature validation on `file`. Returns `Ok(true)` +/// if any error-severity diagnostics were reported, `Ok(false)` for +/// clean. Warnings don't flip the return value but still print. +async fn check_htcl( + file: &camino::Utf8Path, +) -> Result> { + let program = load_htcl_program(file)?; + let parsed = vw_htcl::parse(&program.source); + let validator_diags = vw_htcl::validate(&parsed.document, &program.source); + + // Build a per-file `LineIndex` lazily so we only pay for files + // that actually have diagnostics. Keyed by `file_index`. + let cwd_owned = std::env::current_dir().ok(); + let cwd = cwd_owned.as_deref(); + let mut indices: std::collections::HashMap = + std::collections::HashMap::new(); + + let mut error_count = 0usize; + let mut warning_count = 0usize; + let mut emit = |severity: Option, + message: &str, + span: vw_htcl::Span| { + let level = match severity { + None | Some(vw_htcl::Severity::Error) => { + error_count += 1; + "error:".bright_red() + } + Some(vw_htcl::Severity::Warning) => { + warning_count += 1; + "warning:".bright_yellow() + } + }; + // Map the span back to its originating file's line/col so the + // displayed location is the file the user actually wrote — not + // the flat dependency-concatenated source the loader produced. + let (display_path, line, col) = match program.locate_span(span) { + Some((idx, file_span)) => { + let loaded = &program.files[idx]; + let index = indices + .entry(idx) + .or_insert_with(|| vw_htcl::LineIndex::new(&loaded.source)); + let (start, _) = index.range(file_span); + ( + render_path(&loaded.path, cwd), + start.line + 1, + start.character + 1, + ) + } + None => (file.to_string(), 0, 0), + }; + eprintln!("{} {display_path}:{line}:{col}: {message}", level); + }; + + for err in &parsed.errors { + emit(None, &err.message, err.span); + } + for d in &validator_diags { + emit(Some(d.severity), &d.message, d.span); + } + + if error_count > 0 || warning_count > 0 { + eprintln!("{file}: {error_count} error(s), {warning_count} warning(s)"); + } + Ok(error_count > 0) +} + +/// Render `path` relative to `cwd` when it sits underneath, otherwise +/// fall back to the absolute path. Keeps diagnostic locations short +/// and click-through-able in editors / terminals. +fn render_path( + path: &std::path::Path, + cwd: Option<&std::path::Path>, +) -> String { + if let Some(cwd) = cwd { + if let Ok(rel) = path.strip_prefix(cwd) { + return rel.display().to_string(); + } + } + path.display().to_string() +} + +/// For every monomorphized generic encountered while walking `ty` +/// (recursing through user-newtype underlyings), emit its repr to +/// the backend exactly once. Dedup is owned by the caller so +/// repeated invocations across signatures don't re-ship the same +/// proc. +/// Stream-sink rendering for `vw run`. Mirrors the REPL's +/// scrollback colors + stack-frame rewriting so both surfaces +/// look the same: +/// +/// - **Stream kind → ANSI color/prefix** +/// - `Error` → `✗ ` red bold +/// - `Warning` → `⚠ ` orange (Rgb 255,140,0) +/// - `Info` → `· ` dark gray +/// - `Stdout` → no prefix, no color +/// +/// - **Stack-frame rewriting**: lines matching ` at :N +/// in ::proc` are mapped to the real htcl source via +/// [`vw_repl::resolve_stack_frames_with`] + `proc_table`. +/// Adjacent frames pointing at the same proc collapse to one. +/// +/// - **Origin tagging**: warnings/errors that arrive without an +/// `\n at …` trace get one appended pointing at the +/// currently-executing top-level statement (`origin`). Mirrors +/// the REPL's `tag_streamed_message` — Vivado C++ paths +/// bypass `::common::send_msg_id` and emit traceless messages +/// we'd otherwise have no anchor for. +fn render_chunk( + kind: vw_vivado::StreamKind, + chunk: &str, + proc_table: &std::collections::HashMap, + origin: Option<&vw_repl::Origin>, + input_file: Option<&std::path::Path>, +) { + use colored::Colorize; + use std::io::Write; + // Drop a single trailing newline so the per-message layout + // doesn't insert a blank gap. The shim's `puts` already + // preserves user-side newlines inside the message. + let trimmed = chunk.trim_end_matches('\n'); + if trimmed.is_empty() { + return; + } + let resolved = vw_repl::resolve_stack_frames_with( + trimmed, + |name| proc_table.get(name).cloned(), + input_file, + ); + // Tag traceless warnings/errors with the currently-executing + // statement's origin. + let tagged = match kind { + vw_vivado::StreamKind::Warning | vw_vivado::StreamKind::Error + if !resolved.contains("\n at ") => + { + match origin { + Some(o) => { + let path = o + .file + .as_deref() + .map(vw_repl::display_path) + .unwrap_or_else(|| { + input_file + .map(vw_repl::display_path) + .unwrap_or_else(|| "".into()) + }); + format!("{resolved}\n at {path}:{}", o.line) + } + None => resolved, + } + } + _ => resolved, + }; + let prefix = match kind { + vw_vivado::StreamKind::Error => "✗ ", + vw_vivado::StreamKind::Warning => "⚠ ", + vw_vivado::StreamKind::Info => "· ", + vw_vivado::StreamKind::Stdout => "", + }; + let mut out = std::io::stdout().lock(); + for (i, line) in tagged.lines().enumerate() { + let leading = if i == 0 || prefix.is_empty() { + prefix + } else { + " " + }; + let styled_prefix: String = match kind { + vw_vivado::StreamKind::Error => leading.red().bold().to_string(), + vw_vivado::StreamKind::Warning => { + leading.truecolor(255, 140, 0).bold().to_string() + } + vw_vivado::StreamKind::Info => leading.bright_black().to_string(), + vw_vivado::StreamKind::Stdout => leading.to_string(), + }; + let styled_line: String = match kind { + vw_vivado::StreamKind::Error => line.red().to_string(), + vw_vivado::StreamKind::Warning => { + line.truecolor(255, 140, 0).to_string() + } + vw_vivado::StreamKind::Info => line.bright_black().to_string(), + vw_vivado::StreamKind::Stdout => line.to_string(), + }; + let _ = writeln!(out, "{styled_prefix}{styled_line}"); + } + let _ = out.flush(); +} + +async fn ship_generic_reprs( + backend: &mut vw_vivado::VivadoBackend, + ty: &vw_htcl::TypeExpr, + types: &std::collections::HashMap, + emitted: &mut std::collections::HashSet, +) -> Result<(), Box> { + // TypeExpr::Qualified appears only on overloaded-handler + // first-args; the validator forbids it anywhere else, and + // codegen doesn't need a repr for it. + if matches!(ty, vw_htcl::TypeExpr::Qualified { .. }) { + return Ok(()); + } + let emission = vw_htcl::emit_repr_with_types(ty, types); + for p in &emission.procs { + // The procs are emitted in dependency order; the body of + // each instantiation may reference earlier ones in the + // same emission, so we ship them sequentially through + // the same eval channel. + if emitted.insert(p.clone()) { + backend.eval(p).await?; + } + } + Ok(()) +} + +/// Mirror of `vw-repl/src/lower.rs::overload_specialization_mangle`. +/// If `cmd` is a top-level `proc` whose name is an overload public +/// name AND whose first arg is a qualified-variant annotation, +/// return the mangled internal name to lower it under. Keeps +/// `vw run` in step with the REPL's specialization-rerouting. +fn overload_specialization_mangle( + cmd: &vw_htcl::Command, + overloads: &vw_htcl::OverloadTable, +) -> Option { + let vw_htcl::CommandKind::Proc(proc) = &cmd.kind else { + return None; + }; + let name = proc.name.as_deref()?; + if !overloads.contains_key(name) { + return None; + } + let sig = proc.signature.as_ref()?; + let first = sig.args.first()?; + let vw_htcl::TypeExpr::Qualified { variant, .. } = + first.type_annotation.as_ref()? + else { + return None; + }; + Some(vw_htcl::mangle_specialization(name, variant)) +} + +async fn run_htcl( + file: &camino::Utf8Path, + check_only: bool, + verbose: bool, +) -> Result<(), Box> { + let program = load_htcl_program(file)?; + // Keep `program` alive — the stack-frame rewriting needs the + // LoadedProgram for body-span resolution. We borrow `source` + // from it instead of moving. + let source = program.source.clone(); + let parsed = vw_htcl::parse(&source); + let line_index = vw_htcl::LineIndex::new(&source); + + let mut had_errors = false; + for err in &parsed.errors { + had_errors = true; + let (start, _end) = line_index.range(err.span); + eprintln!( + "{} {}:{}:{}: {}", + "error:".bright_red(), + file, + start.line + 1, + start.character + 1, + err.message + ); + } + if had_errors { + return Err(format!( + "{} parse error(s); aborting", + parsed.errors.len() + ) + .into()); + } + + if check_only { + let cmd_count = parsed + .document + .stmts + .iter() + .filter(|s| matches!(s, vw_htcl::Stmt::Command(_))) + .count(); + println!( + "{} {file}: parsed OK ({cmd_count} command(s))", + "✓".bright_green() + ); + return Ok(()); + } + + let mut backend = + vw_vivado::VivadoBackend::spawn(vw_vivado::VivadoConfig { + verbose, + ..Default::default() + }) + .await + .map_err(|e| format!("failed to start Vivado worker: {e}"))?; + + // Build the proc-location table the stream sink uses to map + // Tcl `:N in ::proc` frames back to real htcl source. + // Mirrors what `vw-repl` does per batch — we use the same + // shared helpers (`vw_repl::trace::*`) so REPL and CLI render + // the same. The entry file IS the scratch from build_proc_locations' + // perspective. + let entry_std_path = std::path::Path::new(file.as_str()).to_path_buf(); + let proc_table = std::sync::Arc::new(vw_repl::build_proc_locations( + &parsed.document, + &program, + &entry_std_path, + )); + let input_file_for_stack = std::sync::Arc::new(entry_std_path.clone()); + // Shared between the main loop (writes the current origin + // before each eval) and the stream sink (reads it to tag + // unattributed warnings — e.g. Vivado IP-Flow C++ messages + // that bypass `::common::send_msg_id`). Same trick the REPL + // uses with `pending_origins[pending_eval_index]`. + let current_origin = + std::sync::Arc::new(std::sync::Mutex::new(None::)); + { + let procs = std::sync::Arc::clone(&proc_table); + let input_file = std::sync::Arc::clone(&input_file_for_stack); + let origin = std::sync::Arc::clone(¤t_origin); + backend.set_stdout_sink(move |kind, chunk: &str| { + let cur_origin = origin.lock().ok().and_then(|g| g.clone()); + render_chunk( + kind, + chunk, + &procs, + cur_origin.as_ref(), + Some(input_file.as_path()), + ); + }); + } + + // Lower structured proc declarations and call sites to plain Tcl + // before sending. Generic commands pass through unchanged. + let table = vw_htcl::signature_table(&parsed.document); + // Ship enum preludes + overload dispatchers before any user + // statements run — same shape as the REPL's prepare path. + // Without these, calls to `Property::Scalar` or to an + // overloaded `handle` would fail at runtime with `invalid + // command name`. + let mut _ignored = Vec::new(); + let enum_decl_table = + vw_htcl::build_enum_decl_table(&parsed.document, &mut _ignored); + let type_decl_table = + vw_htcl::build_type_decl_table(&parsed.document, &mut _ignored); + let (full_sigs, overload_table) = + vw_htcl::build_signature_table_with_overloads( + &parsed.document, + &mut _ignored, + ); + // Always ship the primitive prelude so user-written newtype + // reprs can call e.g. `string::repr -v $v` for their inner + // values. + for p in vw_htcl::emit_primitive_prelude() { + let _ = backend.eval(&p).await?; + } + for ed in enum_decl_table.values() { + let prelude = vw_htcl::emit_enum_prelude(ed); + if !prelude.trim().is_empty() { + let _ = backend.eval(&prelude).await?; + } + } + for info in overload_table.values() { + let dispatcher = vw_htcl::emit_dispatcher(info); + let _ = backend.eval(&dispatcher).await?; + } + // Ship monomorphized generic reprs for every type expression + // referenced in any proc signature. This covers user newtypes + // that delegate to a generic repr (e.g. `Properties::repr` + // delegates to `dict_string_Property::repr`); without these + // the user's repr body errors at runtime with `invalid + // command name`. + let mut emitted_generics: std::collections::HashSet = + std::collections::HashSet::new(); + for sig in full_sigs.values() { + if let Some(ret) = sig.return_type.as_ref() { + ship_generic_reprs( + &mut backend, + ret, + &type_decl_table, + &mut emitted_generics, + ) + .await?; + } + for arg in &sig.args { + if let Some(ty) = arg.type_annotation.as_ref() { + ship_generic_reprs( + &mut backend, + ty, + &type_decl_table, + &mut emitted_generics, + ) + .await?; + } + } + } + let line_index = vw_htcl::LineIndex::new(&source); + for stmt in &parsed.document.stmts { + let vw_htcl::Stmt::Command(cmd) = stmt else { + continue; + }; + // Snapshot the origin of THIS statement before shipping + // it, so the stream sink can tag any traceless warning + // Vivado emits during the eval with the right "what was + // running" anchor. Mirrors the REPL's pending_origins + + // pending_eval_index mechanism. + { + let (line, _) = line_index.range(cmd.span); + let snippet = source + [cmd.span.start as usize..cmd.span.end as usize] + .lines() + .next() + .unwrap_or("") + .to_string(); + let file_path = program + .locate_span(cmd.span) + .map(|(idx, _)| program.files[idx].path.clone()); + if let Ok(mut g) = current_origin.lock() { + *g = Some(vw_repl::Origin { + file: file_path, + line: line.line + 1, + snippet, + via: Vec::new(), + }); + } + } + // Overload specializations lower under their mangled + // names so the dispatcher's switch arms can find them. + let lowered = match overload_specialization_mangle(cmd, &overload_table) + { + Some(mangled) => { + let vw_htcl::CommandKind::Proc(proc) = &cmd.kind else { + unreachable!() + }; + vw_htcl::lower_proc_decl_with_name( + proc, + &source, + &table, + Some(&mangled), + ) + } + None => vw_htcl::lower_command(cmd, &source, &table), + }; + // Rewrite `extern::name` → `::name` (the textual pass the + // REPL also runs) so calls to runtime-Tcl/Vivado procs + // reach Vivado as the bare native name instead of the + // literal `extern::` text — without this, every wrapper + // body that forwards via `extern::` errors out at runtime + // with `invalid command name "extern::create_project"`. + let tcl = vw_htcl::rewrite_externs(&lowered).text; + match backend.eval(&tcl).await { + Ok(out) => { + // Puts output already streamed to stdout via the + // sink; `out.stdout` is empty here by contract. The + // eval's return value gets a newline only when it's + // not already empty. + if !out.value.is_empty() { + println!("{}", out.value); + } + } + Err(vw_eda::BackendError::Tcl { message, .. }) => { + eprintln!("{} {message}", "vivado:".bright_red()); + } + Err(e) => { + eprintln!("{} {e}", "vivado:".bright_red()); + } + } } + let _ = backend.shutdown().await; + Ok(()) } diff --git a/vw-eda/Cargo.toml b/vw-eda/Cargo.toml new file mode 100644 index 0000000..f66da6c --- /dev/null +++ b/vw-eda/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "vw-eda" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "EDA backend trait and wire protocol for driving vendor TCL interpreters (Vivado, Quartus, ...)" + +[dependencies] +serde.workspace = true +serde_json.workspace = true +thiserror.workspace = true +tokio.workspace = true +async-trait.workspace = true diff --git a/vw-eda/src/lib.rs b/vw-eda/src/lib.rs new file mode 100644 index 0000000..d86a4b1 --- /dev/null +++ b/vw-eda/src/lib.rs @@ -0,0 +1,105 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! EDA backend abstraction. +//! +//! Defines the trait that every vendor-specific TCL worker implements +//! and the wire protocol used to talk to it. `vw-vivado` is the first +//! implementation; future `vw-quartus` / `vw-synopsys` crates will +//! implement the same trait, and consumers (`vw run`, `vw repl`, the +//! analyzer) talk only to this abstraction. +//! +//! The protocol is intentionally small: newline-delimited JSON +//! requests, one response per request, monotonic IDs. See the project +//! plan's "Wire protocol" section for the design rationale. + +pub mod protocol; + +use async_trait::async_trait; +use thiserror::Error; + +pub use protocol::{ + ErrorPayload, Request, RequestOp, Response, ResponseResult, StreamMessage, + WireMessage, +}; + +/// Errors returned by an [`EdaBackend`]. +#[derive(Debug, Error)] +pub enum BackendError { + /// The worker process exited or could not be started. + #[error("worker process error: {0}")] + Worker(String), + + /// I/O error while reading or writing the wire protocol. + #[error("wire I/O error: {0}")] + Io(#[from] std::io::Error), + + /// Wire protocol message could not be serialized or parsed. + #[error("wire protocol error: {0}")] + Protocol(#[from] serde_json::Error), + + /// The backend reported a TCL-level error in response to a command. + /// `stdout` carries any output the command produced before erroring, + /// so callers can show context. + #[error("TCL error: {message}")] + Tcl { + message: String, + code: Option, + info: Option, + stdout: String, + }, + + /// Catch-all for backend-specific failures. + #[error("{0}")] + Other(String), +} + +/// Result of an [`EdaBackend::eval`] call. +#[derive(Clone, Debug, Default)] +pub struct EvalOutput { + /// The TCL expression's return value, as a string. + pub value: String, + /// stdout captured during this eval (puts to stdout from the user + /// TCL while the shim's capturing flag was set). Always present + /// and may be empty; trailing newlines are preserved as written. + pub stdout: String, +} + +/// A long-lived TCL worker driven by `vw`. +/// +/// Implementations spawn the vendor process (Vivado, Quartus, ...), +/// inject a small shim that speaks the wire protocol, and translate +/// [`Request`]s into [`Response`]s. The trait is intentionally narrow: +/// callers issue commands, the backend runs them, and the protocol is +/// the contract. +#[async_trait] +pub trait EdaBackend: Send { + /// Human-readable backend name, e.g. `"vivado"`. + fn name(&self) -> &str; + + /// Evaluate a TCL command string and return its result plus any + /// stdout the command produced. + /// + /// Equivalent to issuing a [`RequestOp::Eval`] request and + /// extracting both the return value and the captured-puts payload. + /// Most callers should use this in preference to + /// [`EdaBackend::send`] until the structured-eval machinery + /// (phase 4) lands. + async fn eval(&mut self, tcl: &str) -> Result; + + /// Issue an arbitrary request and return the raw response. + /// + /// The default implementation in concrete backends is the + /// preferred place to add `eval_structured` and future ops without + /// changing the trait surface. + async fn send( + &mut self, + request: Request, + ) -> Result; + + /// Cleanly shut the worker down. Backends should make this + /// idempotent so that `Drop` can fall back to it without + /// double-shutdown errors. + async fn shutdown(&mut self) -> Result<(), BackendError>; +} diff --git a/vw-eda/src/protocol.rs b/vw-eda/src/protocol.rs new file mode 100644 index 0000000..77e835d --- /dev/null +++ b/vw-eda/src/protocol.rs @@ -0,0 +1,210 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Newline-delimited JSON wire protocol between `vw` and a vendor +//! TCL worker. +//! +//! v0 implements the `eval` op only. The `eval_structured` op (Phase 4 +//! of the project plan) will land as an additional [`RequestOp`] +//! variant without breaking the wire format. + +use serde::{Deserialize, Serialize}; + +/// A request sent from `vw` to the worker. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Request { + /// Monotonic request id chosen by the sender. The worker echoes + /// it in the matching [`Response`]. + pub id: u64, + #[serde(flatten)] + pub op: RequestOp, +} + +/// The operation a [`Request`] performs. +/// +/// Serialized with `op` as the discriminator (`{"op": "eval", "tcl": +/// "..."}`), matching the project plan's spec. +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(tag = "op", rename_all = "snake_case")] +pub enum RequestOp { + /// Evaluate a TCL command in the worker's interpreter and return + /// the result as a string. + Eval { tcl: String }, + /// Cleanly shut the worker down. Issued by [`crate::EdaBackend::shutdown`]. + Shutdown, +} + +/// A response from the worker for a single request. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Response { + pub id: u64, + #[serde(flatten)] + pub result: ResponseResult, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(untagged)] +pub enum ResponseResult { + Ok { + ok: OkMarker, + #[serde(default)] + result: serde_json::Value, + }, + Err { + ok: ErrMarker, + error: ErrorPayload, + }, +} + +/// Streaming notification emitted by the worker between request and +/// response. `puts` writes from inside an eval are forwarded as these +/// so callers can show output live rather than waiting for the eval +/// to complete (necessary for any long-running synthesis or +/// implementation command). +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct StreamMessage { + /// Id of the in-flight request this stream chunk belongs to. + pub id: u64, + /// `"stdout"` today; reserved for `"stderr"` etc. later. + pub stream: String, + /// The chunk's bytes, including any trailing newline as written. + pub data: String, +} + +/// One wire-level message read from the worker. Either a streaming +/// chunk for an in-flight request, or the request's final response. +/// Discriminated by structural inspection: stream messages have a +/// `stream` field, responses have `ok`. +#[derive(Clone, Debug, Deserialize)] +#[serde(untagged)] +pub enum WireMessage { + Stream(StreamMessage), + Response(Response), +} + +/// Marker that always serializes to the literal `true`. Lets us use +/// the same `ok` field as a discriminator without a custom serializer. +#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)] +pub struct OkMarker(#[serde(deserialize_with = "deserialize_true")] pub bool); + +impl OkMarker { + pub const TRUE: OkMarker = OkMarker(true); +} + +fn deserialize_true<'de, D: serde::Deserializer<'de>>( + de: D, +) -> Result { + let v = bool::deserialize(de)?; + if v { + Ok(true) + } else { + Err(serde::de::Error::custom("expected `true`")) + } +} + +#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)] +pub struct ErrMarker(#[serde(deserialize_with = "deserialize_false")] pub bool); + +impl ErrMarker { + pub const FALSE: ErrMarker = ErrMarker(false); +} + +fn deserialize_false<'de, D: serde::Deserializer<'de>>( + de: D, +) -> Result { + let v = bool::deserialize(de)?; + if !v { + Ok(false) + } else { + Err(serde::de::Error::custom("expected `false`")) + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ErrorPayload { + pub message: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub code: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub info: Option, +} + +impl Response { + pub fn ok(id: u64, result: serde_json::Value) -> Self { + Self { + id, + result: ResponseResult::Ok { + ok: OkMarker::TRUE, + result, + }, + } + } + + pub fn err(id: u64, error: ErrorPayload) -> Self { + Self { + id, + result: ResponseResult::Err { + ok: ErrMarker::FALSE, + error, + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn round_trip_eval_request() { + let req = Request { + id: 1, + op: RequestOp::Eval { + tcl: "puts hi".into(), + }, + }; + let s = serde_json::to_string(&req).unwrap(); + assert!(s.contains("\"op\":\"eval\"")); + assert!(s.contains("\"tcl\":\"puts hi\"")); + let back: Request = serde_json::from_str(&s).unwrap(); + match back.op { + RequestOp::Eval { tcl } => assert_eq!(tcl, "puts hi"), + _ => panic!(), + } + } + + #[test] + fn round_trip_ok_response() { + let r = Response::ok(7, serde_json::json!("hi")); + let s = serde_json::to_string(&r).unwrap(); + let back: Response = serde_json::from_str(&s).unwrap(); + match back.result { + ResponseResult::Ok { result, .. } => { + assert_eq!(result, serde_json::json!("hi")) + } + _ => panic!(), + } + } + + #[test] + fn round_trip_err_response() { + let r = Response::err( + 8, + ErrorPayload { + message: "boom".into(), + code: Some("E1".into()), + info: None, + }, + ); + let s = serde_json::to_string(&r).unwrap(); + let back: Response = serde_json::from_str(&s).unwrap(); + match back.result { + ResponseResult::Err { error, .. } => { + assert_eq!(error.message, "boom"); + assert_eq!(error.code.as_deref(), Some("E1")); + } + _ => panic!(), + } + } +} diff --git a/vw-htcl-cmd/Cargo.toml b/vw-htcl-cmd/Cargo.toml new file mode 100644 index 0000000..ad3a891 --- /dev/null +++ b/vw-htcl-cmd/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "vw-htcl-cmd" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "Generate documented htcl command wrappers from Vivado man-page references" + +[dependencies] +vw-htcl = { path = "../vw-htcl" } +serde.workspace = true +thiserror.workspace = true +toml.workspace = true +winnow.workspace = true + +[dev-dependencies] +tempfile.workspace = true diff --git a/vw-htcl-cmd/src/constraints.rs b/vw-htcl-cmd/src/constraints.rs new file mode 100644 index 0000000..a683422 --- /dev/null +++ b/vw-htcl-cmd/src/constraints.rs @@ -0,0 +1,212 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Per-command signature augmentations layered on top of the +//! man-page-derived wrapper. +//! +//! UG835 gives us each command's flag/positional list and types, but +//! it has no language for the semantic refinements an htcl wrapper +//! benefits from — mutually-exclusive call modes (`set_property`'s +//! `-dict` vs `-name/-value/-objects` pair), inter-argument +//! requirements (`tuser_width @requires has_tuser`), reclassifying +//! a positional into a keyword-form arg with a default. Those live +//! in a TOML file the wrapper-module author hand-maintains alongside +//! the auto-generated `cmd/*.htcl` files. +//! +//! File shape: +//! +//! ```toml +//! [.args.] +//! default = "..." # adds/replaces @default(...) +//! enum = ["a", "b"] # adds/replaces @enum(a, b) +//! clear_enum = true # drops any @enum the man-page emitted +//! one_of = ["other"] # adds @one_of(other) +//! requires = ["a", "b"] # adds @requires(a, b) +//! conflicts = ["a"] # adds @conflicts(a) +//! ``` +//! +//! The generator applies overrides during signature emission. The +//! body emission then follows the post-override arg classification +//! — flipping a flag from `@enum(0, 1)` to `@default("")` makes it +//! a value-taking arg, and the body forwards `-flag $value` +//! instead of `if {$flag} { lappend cmd -flag }`. + +use std::collections::HashMap; +use std::path::Path; + +use serde::Deserialize; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ConstraintsError { + #[error("reading {path}: {source}")] + Io { + path: std::path::PathBuf, + #[source] + source: std::io::Error, + }, + #[error("parsing {path}: {source}")] + Parse { + path: std::path::PathBuf, + #[source] + source: toml::de::Error, + }, +} + +/// Per-arg overrides for one command. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq)] +pub struct ArgOverride { + /// New `@default(...)` value. Replaces any inherited default. + #[serde(default)] + pub default: Option, + /// New `@enum(...)` choices. Replaces any inherited enum. + #[serde(default, rename = "enum")] + pub enum_: Option>, + /// Drop any inherited `@enum`. Use when the man-page parsing + /// modeled an arg as `@enum(0, 1)` (boolean toggle) but it's + /// actually value-taking. + #[serde(default)] + pub clear_enum: bool, + /// `@one_of(...)` declarations to add. Empty means no addition. + #[serde(default)] + pub one_of: Vec, + /// `@requires(...)` declarations to add. + #[serde(default)] + pub requires: Vec, + /// `@conflicts(...)` declarations to add. + #[serde(default)] + pub conflicts: Vec, + /// Override whether this arg carries a Vivado typed Tcl_Obj + /// handle (e.g. a bd_cell, get_bd_pins result). `None` keeps + /// the generator default (a name-based allowlist of well-known + /// typed-arg names like `objects`/`cells`/`pin`/...). `Some(true)` + /// forces this arg to be treated as typed; `Some(false)` forces + /// it to be treated as a plain string. + /// + /// Typed args are passed directly via `-flag $value` in the + /// wrapper body — never through `[list]` or `lappend` — because + /// list-construction shimmers Vivado's internal typed Tcl_Obj to + /// a plain string, and downstream consumers like + /// `set_property -objects` reject the stringified path. + #[serde(default)] + pub typed: Option, + + /// Per-arg type annotation: any valid htcl type expression + /// (`bd_cell`, `list`, `string`, etc.). Wins over + /// the inferred type from the typed-handle name table when + /// both are present. Set when an arg's name doesn't match + /// the table's plural-aware heuristic, or when the man-page + /// `object` placeholder is actually a specific type. + #[serde(default, rename = "type")] + pub arg_type: Option, +} + +/// All overrides for one command, indexed by arg ident. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq)] +pub struct CommandOverride { + /// Per-arg overrides. The key is the htcl proc-arg identifier + /// (matches `Argument::ident`). + #[serde(default)] + pub args: HashMap, + /// Override the command's return type. The string is taken + /// verbatim and emitted as the proc's 4th-word annotation + /// (`proc NAME { args } { body }`), so any valid + /// htcl type expression works: `bd_cell`, `list`, + /// `dict`, `unit`. Use this when the Returns: + /// phrase auto-mapping is ambiguous or wrong for a specific + /// command. + #[serde(default)] + pub returns: Option, +} + +/// The complete set of overrides loaded from the constraints file. +/// Lookups are by command name (`set_property`, `create_project`, +/// …) — missing entries return `None` and the generator emits the +/// pure man-page-derived wrapper. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq)] +#[serde(transparent)] +pub struct ConstraintsTable { + commands: HashMap, +} + +impl ConstraintsTable { + /// Empty table — every command falls back to the pure man-page + /// signature. Used when no `--constraints` was passed. + pub fn empty() -> Self { + Self::default() + } + + /// Load from a TOML file at `path`. + pub fn load(path: &Path) -> Result { + let text = std::fs::read_to_string(path).map_err(|e| { + ConstraintsError::Io { + path: path.to_path_buf(), + source: e, + } + })?; + toml::from_str(&text).map_err(|e| ConstraintsError::Parse { + path: path.to_path_buf(), + source: e, + }) + } + + /// Per-command overrides, or `None` when nothing is declared + /// for `command`. + pub fn for_command(&self, command: &str) -> Option<&CommandOverride> { + self.commands.get(command) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_table_returns_no_overrides() { + let t = ConstraintsTable::empty(); + assert!(t.for_command("set_property").is_none()); + } + + #[test] + fn parses_full_arg_override_block() { + let toml = r#" + [set_property.args.dict] + default = "" + clear_enum = true + one_of = ["name"] + requires = ["objects"] + + [set_property.args.name] + default = "" + one_of = ["dict"] + requires = ["value", "objects"] + "#; + let t: ConstraintsTable = toml::from_str(toml).unwrap(); + let sp = t.for_command("set_property").unwrap(); + let dict = sp.args.get("dict").unwrap(); + assert_eq!(dict.default.as_deref(), Some("")); + assert!(dict.clear_enum); + assert_eq!(dict.one_of, vec!["name".to_string()]); + assert_eq!(dict.requires, vec!["objects".to_string()]); + + let name = sp.args.get("name").unwrap(); + assert_eq!(name.default.as_deref(), Some("")); + assert_eq!(name.one_of, vec!["dict".to_string()]); + assert_eq!( + name.requires, + vec!["value".to_string(), "objects".to_string()] + ); + } + + #[test] + fn missing_command_returns_none() { + let toml = r#" + [set_property.args.dict] + default = "" + "#; + let t: ConstraintsTable = toml::from_str(toml).unwrap(); + assert!(t.for_command("create_project").is_none()); + assert!(t.for_command("set_property").is_some()); + } +} diff --git a/vw-htcl-cmd/src/generate.rs b/vw-htcl-cmd/src/generate.rs new file mode 100644 index 0000000..3c09f43 --- /dev/null +++ b/vw-htcl-cmd/src/generate.rs @@ -0,0 +1,790 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Emit an htcl wrapper proc for a parsed [`ManPage`]. +//! +//! Shape, for a command `add_files`: +//! +//! ```htcl +//! # Preserve the underlying Vivado command so the wrapper can forward +//! # to it after shadowing the global name. +//! if {[info commands __viv_add_files] eq "" && [info commands add_files] ne ""} { +//! rename add_files __viv_add_files +//! } +//! +//! ## Adds one or more source files ... +//! proc add_files { +//! ## (Optional) The fileset to add to. +//! @default("") fileset +//! ## (Optional) Do not recurse ... +//! @enum(0, 1) @default(0) norecurse +//! ## Positional operands ... +//! @default("") operands +//! } { +//! set cmd [list __viv_add_files] +//! if {$fileset ne ""} { lappend cmd -fileset $fileset } +//! if {$norecurse} { lappend cmd -norecurse } +//! if {$operands ne ""} { lappend cmd {*}$operands } +//! return [{*}$cmd] +//! } +//! ``` +//! +//! The wrapper keeps the command's natural name and shadows the +//! builtin; a guarded `rename` stashes the original under +//! `` so the body forwards to it without recursing. All +//! arguments are addressed by keyword (`-fileset value`); boolean flags +//! take a `0`/`1` value at the htcl layer and lower to flag +//! presence/absence on the Vivado command line. + +use std::fmt::Write; + +use vw_htcl::emit::{Command, Doc, Item, Word}; + +use crate::constraints::{ArgOverride, ConstraintsTable}; +use crate::model::{ArgKind, Argument, ManPage}; + +/// Arg names whose values are Vivado typed `Tcl_Obj` handles — +/// `bd_cell`, `bd_pin`, etc. — and therefore must be passed to the +/// underlying command **directly** (`-flag $value`) rather than +/// through `[list]` or `lappend`. List construction shimmers Tcl's +/// internal typed representation away, leaving the handle as a +/// plain path string; downstream code paths in Vivado (notably +/// `set_property -objects`) reject the stringified path with +/// `[Common 17-161] Invalid option value`. +/// +/// Curated list of the obvious cases. Per-arg override via +/// `cmd-constraints.toml`'s `typed = true|false` covers the long +/// tail. +const TYPED_ARG_NAMES: &[&str] = &[ + "object", + "objects", + "of_objects", + "cell", + "cells", + "pin", + "pins", + "port", + "ports", + "intf_pin", + "intf_pins", + "intf_port", + "intf_ports", + "net", + "nets", + "intf_net", + "intf_nets", +]; + +fn is_typed_arg(name: &str, override_: Option) -> bool { + match override_ { + Some(t) => t, + None => TYPED_ARG_NAMES.contains(&name), + } +} + +/// Map a typed-arg name to its concrete `TypeExpr` text, when +/// known. Drives the `name: TYPE` annotation emitted in the +/// generated proc args. Plural names (`cells`, `pins`) map to +/// `list`; singulars (`cell`, `pin`) map to the bd_* type +/// directly. Generic catch-alls (`object`, `objects`, +/// `of_objects`) can refer to any Vivado handle class, so we +/// leave them untyped at this layer — the type system doesn't +/// have unions in v1. +/// +/// Returning `None` means "the arg is typed (don't list-wrap in +/// the body) but we don't have a precise type expression for +/// it" — the generator emits the arg without an annotation. +fn typed_arg_type(name: &str) -> Option<&'static str> { + match name { + "cell" => Some("bd_cell"), + "cells" => Some("list"), + "pin" => Some("bd_pin"), + "pins" => Some("list"), + "port" => Some("bd_port"), + "ports" => Some("list"), + "net" => Some("bd_net"), + "nets" => Some("list"), + "intf_pin" => Some("bd_intf_pin"), + "intf_pins" => Some("list"), + "intf_port" => Some("bd_intf_port"), + "intf_ports" => Some("list"), + "intf_net" => Some("bd_intf_net"), + "intf_nets" => Some("list"), + // object / objects / of_objects: any handle class — no + // precise type until we have unions. + _ => None, + } +} + +#[derive(Clone, Debug)] +pub struct GenerateOptions { + /// Prefix for the stashed original command (`rename add_files + /// __viv_add_files`). Kept for backwards compatibility — the + /// lowering pass now generates the rename plumbing, so this + /// field has no effect. + pub rename_prefix: String, + /// Emit each command's `See Also` list as a doc-comment footer. + pub include_see_also: bool, + /// Per-command signature augmentations loaded from + /// `cmd-constraints.toml`. The generator merges these onto the + /// man-page-derived shape so wrapper authors can declare + /// mutually-exclusive call modes, value-taking flags + /// misclassified by the man page, etc., without hand-editing + /// the generated files. + pub constraints: ConstraintsTable, +} + +impl Default for GenerateOptions { + fn default() -> Self { + Self { + rename_prefix: "__viv_".to_string(), + include_see_also: true, + constraints: ConstraintsTable::empty(), + } + } +} + +/// Generate the htcl wrapper text for `page`. +pub fn generate(page: &ManPage, opts: &GenerateOptions) -> String { + let cmd = &page.name; + // Wrapper body forwards to the underlying Vivado proc via + // `extern::` (which the lowering rewrites to the bare native + // name). The wrapper itself lives inside `namespace eval + // vivado { ... }` so it doesn't shadow the global name the + // body is forwarding to — that's what frees Vivado's own + // internal Tcl from accidentally hitting our typed wrappers + // when it calls a sibling builtin. + let forwarded = format!("extern::{}", page.name); + + let overrides = opts.constraints.for_command(&page.name); + let effective = effective_args(page, overrides); + + let mut out = String::new(); + writeln!( + out, + "# Generated by `vw htcl-cmd generate` from the Vivado command \ + reference." + ) + .unwrap(); + writeln!(out, "# Do not edit by hand.").unwrap(); + writeln!(out).unwrap(); + + // Wrappers live in `vivado_cmd::`, NOT `vivado::`. Vivado has + // its own internal `::vivado` namespace and code paths that + // behave differently depending on the calling namespace — + // notably `set_property -dict -objects ...` rejects valid cell + // handles when invoked from inside `::vivado`. Picking a name + // Vivado doesn't use means our wrapper bodies never collide + // with Vivado-internal namespace state. + writeln!(out, "namespace eval vivado_cmd {{").unwrap(); + writeln!(out).unwrap(); + + // Proc doc comment: the command Description, then a See-Also footer. + emit_proc_doc(&mut out, page, opts); + + // Proc args (structured) and body (compact Tcl). + let args = build_args(page, &effective); + let body = build_body(&forwarded, &effective); + // Resolve return type. Priority: + // 1. Explicit override in `cmd-constraints.toml`. + // 2. The page's `Returns:` section, if present. + // 3. Phrases in the `Description:` section — Vivado very + // rarely uses a dedicated Returns: header, so this is + // actually the common path. The phrase table is the same + // either way. + let return_type = overrides + .and_then(|o| o.returns.as_deref()) + .map(String::from) + .or_else(|| infer_return_type(page.returns.as_deref())) + .or_else(|| infer_return_type(Some(page.description.as_slice()))); + emit_proc(&mut out, cmd, &args, return_type.as_deref(), &body); + + writeln!(out).unwrap(); + writeln!(out, "}}").unwrap(); + + // Trim trailing whitespace line-by-line (empty doc comments emit a + // trailing space) and guarantee a single trailing newline. + let mut cleaned: String = out + .lines() + .map(str::trim_end) + .collect::>() + .join("\n"); + cleaned.push('\n'); + cleaned +} + +/// Write the proc-level doc comments above the `proc` line so they +/// attach to it. The output is structured as +/// +/// ```text +/// ## +/// ## +/// ## +/// ## +/// ## +/// ``` +/// +/// where the summary is the first sentence of the source description +/// (LSP-clients use it for inline annotations like +/// `CompletionItem::detail`) and the body is everything after, +/// rendered as separate paragraphs. The body paragraphs are +/// re-wrapped at ~78 columns so the on-disk file stays readable +/// without preserving the man-page's source wrap. +fn emit_proc_doc(out: &mut String, page: &ManPage, opts: &GenerateOptions) { + let raw: Vec = + page.description.iter().map(|l| sanitize_doc(l)).collect(); + let summary = vw_htcl::doc::brief(&raw); + let extended = vw_htcl::doc::extended(&raw); + + match summary { + None => { + writeln!(out, "## Wrapper for the Vivado `{}` command.", page.name) + .unwrap(); + } + Some(s) => { + emit_paragraph_lines(out, &s, "## ", 78); + } + } + if let Some(body) = extended { + for paragraph in body.split("\n\n") { + writeln!(out, "##").unwrap(); + emit_paragraph_lines(out, paragraph, "## ", 78); + } + } + + if opts.include_see_also && !page.see_also.is_empty() { + writeln!(out, "##").unwrap(); + writeln!(out, "## See also: {}", page.see_also.join(", ")).unwrap(); + } +} + +fn emit_paragraph_lines( + out: &mut String, + text: &str, + prefix: &str, + width: usize, +) { + let body_width = width.saturating_sub(prefix.len()); + for line in vw_htcl::doc::wrap_paragraph(text, body_width) { + writeln!(out, "{prefix}{line}").unwrap(); + } +} + +/// One argument plus whatever overrides from `cmd-constraints.toml` +/// apply to it. `default`, `enum_values`, `one_of`, `requires`, +/// `conflicts` are the *final* values the wrapper should emit; +/// constraint resolution has already happened. +/// +/// `kind` is derived: a constraint that clears the enum and adds a +/// default to a man-page-Boolean arg flips it to value-taking, so +/// the body-emitter forwards `-flag $value` instead of `if {$f} { +/// lappend cmd -flag }`. +#[derive(Clone, Debug)] +struct EffectiveArg { + ident: String, + flag: Option, + kind: ArgKind, + /// `None` → no default (required); `Some(text)` → emit `@default(text)`. + default: Option, + /// `None` → no enum; `Some(vec)` → emit `@enum(...)`. + enum_values: Option>, + one_of: Vec, + requires: Vec, + conflicts: Vec, + description: Vec, + /// True when this arg carries a Vivado typed `Tcl_Obj` handle + /// — body emission passes it directly (`-flag $value`) rather + /// than threading it through a list. See [`TYPED_ARG_NAMES`]. + typed: bool, + /// The arg's declared type expression, if known. Emitted in + /// the proc args as `name: TYPE`. Set from + /// [`typed_arg_type`] for the typed-handle allowlist; an + /// explicit per-arg `type = "..."` override in + /// `cmd-constraints.toml` wins over the inferred value. + arg_type: Option, +} + +fn effective_args( + page: &ManPage, + overrides: Option<&crate::constraints::CommandOverride>, +) -> Vec { + page.arguments + .iter() + .map(|arg| { + effective_arg(arg, overrides.and_then(|o| o.args.get(&arg.ident))) + }) + .collect() +} + +fn effective_arg(arg: &Argument, over: Option<&ArgOverride>) -> EffectiveArg { + let empty = ArgOverride::default(); + let over = over.unwrap_or(&empty); + + // Default value: explicit override wins; else man-page heuristic. + let mut default: Option = match &arg.kind { + ArgKind::Boolean => Some("0".to_string()), + ArgKind::Value | ArgKind::Positional => { + (!arg.required).then(|| "".to_string()) + } + }; + if let Some(d) = over.default.as_deref() { + default = Some(d.to_string()); + } + + // Enum: man-page-derived for booleans; constraints can clear or + // replace. + let mut enum_values: Option> = match &arg.kind { + ArgKind::Boolean => Some(vec!["0".to_string(), "1".to_string()]), + _ => None, + }; + if over.clear_enum { + enum_values = None; + } + if let Some(v) = &over.enum_ { + enum_values = Some(v.clone()); + } + + // Kind: an arg with no `@enum` (cleared) and a string-typed + // default acts like a value-taking flag — body-emit should + // forward `-flag $value`, not `if {$flag} { ... }`. This is the + // exact shape `set_property -dict` needs. + let kind = if matches!(arg.kind, ArgKind::Boolean) && enum_values.is_none() + { + if arg.flag.is_some() { + ArgKind::Value + } else { + ArgKind::Positional + } + } else { + arg.kind + }; + + let typed = is_typed_arg(&arg.ident, over.typed); + let arg_type = over + .arg_type + .clone() + .or_else(|| typed_arg_type(&arg.ident).map(String::from)); + + EffectiveArg { + ident: arg.ident.clone(), + flag: arg.flag.clone(), + kind, + default, + enum_values, + one_of: over.one_of.clone(), + requires: over.requires.clone(), + conflicts: over.conflicts.clone(), + description: arg.description.clone(), + typed, + arg_type, + } +} + +/// Build the structured arg list as an emit [`Doc`]: per-argument doc +/// comments followed by an `@attr… ident` declaration. The doc +/// comments follow the same `summary, blank, body` shape the +/// proc-level docs use, so LSP clients can split brief/detail from +/// extended documentation consistently. +fn build_args(_page: &ManPage, effective: &[EffectiveArg]) -> Doc { + let mut doc = Doc::new(); + for (i, arg) in effective.iter().enumerate() { + if i > 0 { + doc.push(Item::Blank); + } + let raw: Vec = + arg.description.iter().map(|l| sanitize_doc(l)).collect(); + let summary = vw_htcl::doc::brief(&raw); + let extended = vw_htcl::doc::extended(&raw); + + let body_width = 76usize; + if let Some(s) = summary.as_deref() { + for line in vw_htcl::doc::wrap_paragraph(s, body_width) { + doc.push(Item::DocComment(line)); + } + } + if let Some(body) = extended { + for paragraph in body.split("\n\n") { + doc.push(Item::DocComment(String::new())); + for line in vw_htcl::doc::wrap_paragraph(paragraph, body_width) + { + doc.push(Item::DocComment(line)); + } + } + } + + doc.push(Item::Command(Command { + doc_comments: Vec::new(), + words: effective_attr_words(arg), + body: None, + })); + } + doc +} + +/// The attribute words + identifier for one effective argument. +fn effective_attr_words(arg: &EffectiveArg) -> Vec { + let mut words = Vec::new(); + if let Some(values) = &arg.enum_values { + let inner = values + .iter() + .map(|v| format_attribute_value(v)) + .collect::>() + .join(", "); + words.push(Word::Raw(format!("@enum({inner})"))); + } + if let Some(default) = &arg.default { + words.push(Word::Raw(format!( + "@default({})", + format_attribute_value(default) + ))); + } + if !arg.one_of.is_empty() { + words.push(Word::Raw(format!("@one_of({})", arg.one_of.join(", ")))); + } + if !arg.requires.is_empty() { + words + .push(Word::Raw(format!("@requires({})", arg.requires.join(", ")))); + } + if !arg.conflicts.is_empty() { + words.push(Word::Raw(format!( + "@conflicts({})", + arg.conflicts.join(", ") + ))); + } + match arg.arg_type.as_deref() { + Some(ty) => { + // Emit `name: TYPE` as two adjacent bare words. The + // proc-args parser tokenizes `name`, `:`, and TYPE + // independently — the layout reads as the user would + // write it. + words.push(Word::Bare(format!("{}:", arg.ident))); + words.push(Word::Bare(ty.to_string())); + } + None => { + words.push(Word::Bare(arg.ident.clone())); + } + } + words +} + +fn format_attribute_value(s: &str) -> String { + let is_int = !s.is_empty() && s.bytes().all(|b| b.is_ascii_digit()); + let is_ident = !s.is_empty() + && s.bytes().enumerate().all(|(i, b)| { + if i == 0 { + b.is_ascii_alphabetic() || b == b'_' + } else { + b.is_ascii_alphanumeric() || b == b'_' + } + }); + if is_int || is_ident { + s.to_string() + } else { + format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\"")) + } +} + +/// Build the proc body. +/// +/// Args are split into two cohorts: +/// +/// - **Non-typed args** (booleans, strings, positionals whose name +/// isn't in the typed-handle allowlist) accumulate into a `flags` +/// list via `lappend`. Each arg's kind drives its emit form — +/// `Boolean` → `if {$x} { lappend flags -flag }`, `Value` → +/// `if {$x ne ""} { lappend flags -flag $x }`, `Positional` → +/// `if {$x ne ""} { lappend flags {*}$x }`. These values are all +/// strings, so the lappend / `{*}`-expansion that follows is +/// safe — string values don't have a typed Tcl_Obj to shimmer. +/// - **Typed-handle args** (`-objects`/`-cell`/etc., per +/// [`TYPED_ARG_NAMES`] or per-arg `typed = true` override) are +/// passed **directly** to the underlying command via +/// `-flag $value`. Putting them through `[list]` or `lappend` +/// would shimmer Vivado's internal typed Tcl_Obj to a string, +/// and downstream code paths like `set_property -objects` reject +/// the stringified path with `[Common 17-161] Invalid option +/// value '...' specified for 'objects'`. +/// +/// The invocation site branches on which typed args are present so +/// no typed-arg flag appears in the call when its variable is +/// empty. With N typed args this is 2^N branches; in practice N is +/// 0 or 1 for almost every Vivado command, and never more than 2. +fn build_body(orig: &str, effective: &[EffectiveArg]) -> String { + let mut body = String::new(); + + let non_typed: Vec<&EffectiveArg> = + effective.iter().filter(|a| !a.typed).collect(); + let typed: Vec<&EffectiveArg> = + effective.iter().filter(|a| a.typed).collect(); + + // Non-typed accumulator. `flags` is a plain Tcl list — only + // ever contains string values, so list-construction shimmer is + // a non-issue. + writeln!(body, "set flags [list]").unwrap(); + for arg in &non_typed { + let id = &arg.ident; + let required = arg.default.is_none(); + match arg.kind { + ArgKind::Boolean => { + let flag = arg.flag.as_deref().unwrap_or(id); + writeln!(body, "if {{${id}}} {{ lappend flags -{flag} }}") + .unwrap(); + } + ArgKind::Value => { + let flag = arg.flag.as_deref().unwrap_or(id); + if required { + writeln!(body, "lappend flags -{flag} ${id}").unwrap(); + } else { + writeln!( + body, + "if {{${id} ne \"\"}} {{ lappend flags -{flag} ${id} }}" + ) + .unwrap(); + } + } + ArgKind::Positional => { + if required { + writeln!(body, "lappend flags {{*}}${id}").unwrap(); + } else { + writeln!( + body, + "if {{${id} ne \"\"}} \ + {{ lappend flags {{*}}${id} }}" + ) + .unwrap(); + } + } + } + } + + // Typed-arg branching. Direct invocation per combination of + // typed args that are non-empty, so the typed values never + // touch a Tcl list. + emit_typed_invocation(&mut body, orig, &typed, 0); + + body +} + +/// Emit the typed-arg branch tree. At each level we split on +/// "this typed arg present?" and recurse; at the leaves we emit +/// the actual `return [extern:: {*}$flags ...]` with whatever +/// subset of typed args was present. +fn emit_typed_invocation( + body: &mut String, + orig: &str, + typed: &[&EffectiveArg], + depth: usize, +) { + let indent = " ".repeat(depth); + if typed.is_empty() { + writeln!(body, "{indent}return [{orig} {{*}}$flags]").unwrap(); + return; + } + if let Some((first, rest)) = typed.split_first() { + // Required typed args have no `ne ""` guard — they're + // always passed. Optional typed args branch on presence. + let id = &first.ident; + let flag = first.flag.as_deref().unwrap_or(id); + let required = first.default.is_none(); + if required { + emit_typed_invocation_with( + body, + orig, + rest, + &[(*first, flag)], + depth, + ); + } else { + writeln!(body, "{indent}if {{${id} ne \"\"}} {{").unwrap(); + emit_typed_invocation_with( + body, + orig, + rest, + &[(*first, flag)], + depth + 1, + ); + writeln!(body, "{indent}}} else {{").unwrap(); + emit_typed_invocation(body, orig, rest, depth + 1); + writeln!(body, "{indent}}}").unwrap(); + } + } +} + +/// Inner: we've decided to include `included` typed args; the +/// remaining `rest` still need branching. At the leaf we emit a +/// `return` with `{*}$flags` and each included typed arg as +/// `-flag $var`. +fn emit_typed_invocation_with( + body: &mut String, + orig: &str, + rest: &[&EffectiveArg], + included: &[(&EffectiveArg, &str)], + depth: usize, +) { + let indent = " ".repeat(depth); + if rest.is_empty() { + let mut line = format!("{indent}return [{orig} {{*}}$flags"); + for (arg, flag) in included { + match arg.kind { + ArgKind::Positional => { + write!(line, " ${id}", id = arg.ident).unwrap(); + } + _ => { + write!(line, " -{flag} ${id}", id = arg.ident).unwrap(); + } + } + } + line.push(']'); + writeln!(body, "{line}").unwrap(); + return; + } + if let Some((first, more)) = rest.split_first() { + let id = &first.ident; + let flag = first.flag.as_deref().unwrap_or(id); + let required = first.default.is_none(); + if required { + let mut new_included = included.to_vec(); + new_included.push((*first, flag)); + emit_typed_invocation_with(body, orig, more, &new_included, depth); + } else { + writeln!(body, "{indent}if {{${id} ne \"\"}} {{").unwrap(); + let mut new_included = included.to_vec(); + new_included.push((*first, flag)); + emit_typed_invocation_with( + body, + orig, + more, + &new_included, + depth + 1, + ); + writeln!(body, "{indent}}} else {{").unwrap(); + emit_typed_invocation_with(body, orig, more, included, depth + 1); + writeln!(body, "{indent}}}").unwrap(); + } + } +} + +/// Emit `proc { } ? { }` with args and +/// body each indented two spaces. When `return_type` is Some, emits +/// it as the 4th htcl word between the args block and the body — +/// brace-wrapping if the type expression contains whitespace so it +/// parses as a single word. +fn emit_proc( + out: &mut String, + name: &str, + args: &Doc, + return_type: Option<&str>, + body: &str, +) { + let args_text = args.to_string(); + writeln!(out, "proc {name} {{").unwrap(); + for line in args_text.lines() { + if line.is_empty() { + writeln!(out).unwrap(); + } else { + writeln!(out, " {line}").unwrap(); + } + } + match return_type { + Some(ty) => { + // Wrap with `{ … }` if the type expression contains + // whitespace (the htcl parser would otherwise see + // multiple words). + let needs_brace = ty.chars().any(char::is_whitespace); + if needs_brace { + writeln!(out, "}} {{{ty}}} {{").unwrap(); + } else { + writeln!(out, "}} {ty} {{").unwrap(); + } + } + None => { + writeln!(out, "}} {{").unwrap(); + } + } + for line in body.lines() { + if line.is_empty() { + writeln!(out).unwrap(); + } else { + writeln!(out, " {line}").unwrap(); + } + } + writeln!(out, "}}").unwrap(); +} + +/// Infer a return-type annotation from the Vivado man-page's +/// `Returns:` prose. The phrase-table is intentionally small — +/// matches the recurring shapes Vivado uses across hundreds of +/// commands. Unmatched phrasings return `None`; the +/// `cmd-constraints.toml` `returns = "…"` override picks up +/// whatever doesn't match. +/// +/// Matched on the joined, lowercased text — Vivado's prose is +/// short (usually one or two lines) so we don't need a real NLP +/// pipeline. +fn infer_return_type(returns: Option<&[String]>) -> Option { + let lines = returns?; + let joined = lines.join(" ").to_ascii_lowercase(); + let text = joined.trim(); + if text.is_empty() { + return None; + } + // Order matters: more-specific phrases first. Each entry is + // (substring needle, type). A real future implementation + // could swap in regex; substring search is good enough for + // the v1 phrase set. + let table: &[(&str, &str)] = &[ + // "nothing" / "Tcl_OK on success" — side-effecting commands. + ("returns nothing", "unit"), + ("nothing", "unit"), + // Lists of typed handles. + ("a list of cells", "list"), + ("a list of bd_cells", "list"), + ("list of cell objects", "list"), + ("a list of pins", "list"), + ("a list of bd_pins", "list"), + ("a list of intf_pins", "list"), + ("a list of interface pins", "list"), + ("a list of intf_ports", "list"), + ("a list of interface ports", "list"), + ("a list of ports", "list"), + ("a list of nets", "list"), + ("a list of intf_nets", "list"), + ("a list of interface nets", "list"), + // Singular handles. + ("the cell created", "bd_cell"), + ("the new cell", "bd_cell"), + ("the pin created", "bd_pin"), + ("the port created", "bd_port"), + ("the net created", "bd_net"), + // Property values. + ("the property value", "string"), + ("the value of the property", "string"), + ("a list of properties", "list"), + // Generic strings (catch-all when prose says "string" + // explicitly). + ("returns a string", "string"), + ]; + for (needle, ty) in table { + if text.contains(needle) { + return Some((*ty).into()); + } + } + None +} + +/// Make doc-comment text safe to embed inside the proc arg-list braces. +/// +/// The htcl parser captures a proc's arg list as a braced word and +/// brace-matches it raw (only `{`, `}`, `\` are special); a per-arg +/// `##` doc comment with an unbalanced brace or a stray backslash would +/// corrupt that match. Neutralize the three offenders — braces become +/// parentheses, backslashes become slashes — which keeps the prose +/// legible while guaranteeing the generated wrapper parses. +fn sanitize_doc(s: &str) -> String { + s.replace('\\', "/") + .replace('{', "(") + .replace('}', ")") + .trim_end() + .to_string() +} diff --git a/vw-htcl-cmd/src/lib.rs b/vw-htcl-cmd/src/lib.rs new file mode 100644 index 0000000..d90c42c --- /dev/null +++ b/vw-htcl-cmd/src/lib.rs @@ -0,0 +1,77 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Vivado command reference → htcl wrapper generation. +//! +//! The dual of [`vw_ip`]: where that crate turns an IP-XACT component +//! into a configuration-interface proc, this one turns a Vivado Tcl +//! command's plain-text reference page (under +//! `/doc/eng/man`) into a documented, typed htcl wrapper for +//! that command. +//! +//! Each generated wrapper keeps the command's natural name and shadows +//! the Vivado builtin, forwarding to a `rename`-stashed copy of the +//! original. The payoff is the htcl surface: hover documentation drawn +//! from the man page, `@enum`/`@default` validation on flags, and +//! keyword call sites the analyzer can check — all on the real command +//! names. +//! +//! ```no_run +//! let page = vw_htcl_cmd::load("/opt/Vivado/doc/eng/man/add_files", None)?; +//! let htcl = vw_htcl_cmd::generate(&page, &Default::default()); +//! print!("{htcl}"); +//! # Ok::<(), vw_htcl_cmd::Error>(()) +//! ``` + +pub mod constraints; +pub mod generate; +pub mod model; +pub mod parse; + +pub use constraints::{ + ArgOverride, CommandOverride, ConstraintsError, ConstraintsTable, +}; +pub use generate::{generate, GenerateOptions}; +pub use model::{ArgKind, Argument, ManPage}; +pub use parse::parse_man_page; + +use std::path::Path; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("reading man page `{path}`: {source}")] + Io { + path: String, + #[source] + source: std::io::Error, + }, + #[error("cannot derive a command name from `{0}` (no file stem)")] + NoName(String), +} + +pub type Result = std::result::Result; + +/// Load and parse a man page from disk. +/// +/// The command name comes from `name_override` when given, otherwise +/// from the file stem (`.../man/add_files` → `add_files`). +pub fn load( + path: impl AsRef, + name_override: Option<&str>, +) -> Result { + let path = path.as_ref(); + let text = std::fs::read_to_string(path).map_err(|source| Error::Io { + path: path.display().to_string(), + source, + })?; + let name = match name_override { + Some(n) => n.to_string(), + None => path + .file_stem() + .and_then(|s| s.to_str()) + .ok_or_else(|| Error::NoName(path.display().to_string()))? + .to_string(), + }; + Ok(parse_man_page(&name, &text)) +} diff --git a/vw-htcl-cmd/src/model.rs b/vw-htcl-cmd/src/model.rs new file mode 100644 index 0000000..4034f67 --- /dev/null +++ b/vw-htcl-cmd/src/model.rs @@ -0,0 +1,102 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! The structured model of a Vivado command reference ("man page"). +//! +//! Vivado ships a plain-text reference page per Tcl command under +//! `doc/eng/man`. Each page follows a regular shape: +//! +//! ```text +//! Description: +//! +//! +//! +//! Arguments: +//! +//! -fileset - (Optional) +//! -norecurse - (Optional) +//! - (Required) +//! +//! Examples: +//! ... +//! +//! See Also: +//! +//! * import_files +//! * read_ip +//! ``` +//! +//! [`crate::parse`] turns that text into a [`ManPage`]; [`crate::generate`] +//! turns a [`ManPage`] into an htcl wrapper proc. + +/// A parsed Vivado command reference page. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct ManPage { + /// The command name (e.g. `add_files`). Derived from the source + /// file stem, not the page body — the body never repeats it. + pub name: String, + /// The `Description:` section, de-indented, one entry per source + /// line. Empty lines are preserved as empty strings so paragraph + /// breaks survive into the emitted doc comment. + pub description: Vec, + /// The `Arguments:` section, one entry per documented flag or + /// positional operand, in declared order. + pub arguments: Vec, + /// Command names listed under `See Also:`. + pub see_also: Vec, + /// The raw `Returns:` prose, one entry per source line. Many + /// Vivado man pages don't include this section — it's `None` + /// in that case, and the generator emits the wrapper without + /// a return-type annotation (the REPL falls back to the + /// untyped heuristic for those). + pub returns: Option>, +} + +/// How an argument maps onto the underlying Vivado command line. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ArgKind { + /// A `-flag` with no value placeholder: a boolean toggle. Emitted + /// as `@enum(0, 1) @default(0)` and forwarded as a bare `-flag` + /// when set. + Boolean, + /// A `-flag `: forwarded as `-flag $value` when non-empty. + Value, + /// A trailing positional operand (``, ``, …): + /// forwarded by list-expansion (`{*}$operands`) at the end of the + /// command line. + Positional, +} + +/// One documented argument of a command. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Argument { + pub kind: ArgKind, + /// The htcl proc-arg identifier the caller uses as `-`. + /// Equal to `flag` for flags; derived from the `` for + /// positionals. May be de-collided with a suffix. + pub ident: String, + /// The underlying Vivado flag name without its leading dash + /// (`fileset`, `norecurse`). `None` for positionals, which have no + /// flag on the command line. + pub flag: Option, + /// Whether the man page marked the argument `(Required)`. Required + /// arguments are emitted without an `@default`, so htcl forces the + /// caller to supply them. + pub required: bool, + /// Whether this is a generic operand placeholder synthesized by the + /// generator (the page documented no positional), rather than one + /// taken from the page text. + pub synthesized: bool, + /// The argument's prose description, de-indented, one entry per + /// source line (empty strings preserve paragraph breaks). + pub description: Vec, +} + +impl Argument { + /// `true` for flags (`-flag` / `-flag `), `false` for + /// positionals. + pub fn is_flag(&self) -> bool { + matches!(self.kind, ArgKind::Boolean | ArgKind::Value) + } +} diff --git a/vw-htcl-cmd/src/parse.rs b/vw-htcl-cmd/src/parse.rs new file mode 100644 index 0000000..a2639aa --- /dev/null +++ b/vw-htcl-cmd/src/parse.rs @@ -0,0 +1,554 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Parse a Vivado command reference page into a [`ManPage`]. +//! +//! Following the convention in [`vw_htcl::parser`], the outer loop is +//! hand-rolled — it owns line grouping and section recovery, which a +//! pure combinator grammar models awkwardly for free-form reference +//! text — while the structural inner pieces (an argument header's +//! `-flag - (marker)` shape, a `See Also` bullet) are +//! parsed with [`winnow`]. +//! +//! The grammar the inner parsers recognize, per argument block: +//! +//! ```text +//! flag-header := '-' ident placeholder? ' - ' marker? prose +//! pos-header := '<' .. '>' '...'? ' - ' marker? prose +//! marker := '(' ('Optional' | 'Required') ')' +//! ``` +//! +//! A `placeholder` (anything between the flag name and the ` - ` +//! separator) makes a flag a *value* flag; its absence makes it a +//! *boolean* toggle. + +use winnow::ascii::space0; +use winnow::combinator::{opt, preceded}; +use winnow::token::take_while; +use winnow::ModalResult; +use winnow::Parser; + +use crate::model::{ArgKind, Argument, ManPage}; + +/// Parse `text` (the contents of one man page) into a [`ManPage`]. +/// `name` is the command name (the source file stem). +pub fn parse_man_page(name: &str, text: &str) -> ManPage { + let normalized = text.replace('\r', ""); + let sections = split_sections(&normalized); + + let description = section_lines(§ions, "Description") + .map(dedent_block) + .unwrap_or_default(); + + let arguments = section_lines(§ions, "Arguments") + .map(|lines| parse_arguments(&dedent_block(lines))) + .unwrap_or_default(); + + let see_also = section_lines(§ions, "See Also") + .or_else(|| section_lines(§ions, "See also")) + .map(parse_see_also) + .unwrap_or_default(); + + // The `Returns:` section is optional and usually one or two + // lines. Some pages spell it `Return Value` or `Return value` + // — accept both. + let returns = section_lines(§ions, "Returns") + .or_else(|| section_lines(§ions, "Return Value")) + .or_else(|| section_lines(§ions, "Return value")) + .map(dedent_block); + + let mut page = ManPage { + name: name.to_string(), + description, + arguments, + see_also, + returns, + }; + finalize_arguments(&mut page); + page +} + +// --------------------------------------------------------------------------- +// Sectioning (hand-rolled outer loop). +// --------------------------------------------------------------------------- + +/// A man page is a flat list of `Header:` sections. Returns each +/// section's title (without the trailing colon) paired with its raw +/// body lines, in document order. +fn split_sections(text: &str) -> Vec<(String, Vec)> { + let mut sections: Vec<(String, Vec)> = Vec::new(); + for line in text.lines() { + if let Some(title) = section_header(line) { + sections.push((title, Vec::new())); + } else if let Some((_, body)) = sections.last_mut() { + body.push(line.to_string()); + } + // Lines before the first header (a leading blank line, usually) + // are dropped. + } + sections +} + +/// Recognize a section header line — a capitalized label at column +/// zero ending in a colon, e.g. `Arguments:` or `See Also:`. Returns +/// the label without the colon. +fn section_header(line: &str) -> Option { + // Headers sit flush left; body text is indented. Cheap reject + // first. + if line.is_empty() || line.starts_with(' ') { + return None; + } + let stripped = line.strip_suffix(':')?; + if stripped.is_empty() + || !stripped + .chars() + .all(|c| c.is_ascii_alphabetic() || c == ' ') + || !stripped.starts_with(|c: char| c.is_ascii_uppercase()) + { + return None; + } + Some(stripped.to_string()) +} + +/// The body lines of the first section whose title equals `title`. +fn section_lines<'a>( + sections: &'a [(String, Vec)], + title: &str, +) -> Option<&'a [String]> { + sections + .iter() + .find(|(t, _)| t == title) + .map(|(_, body)| body.as_slice()) +} + +/// Strip the uniform two-space indent man-page bodies carry, leaving +/// any deeper (bullet / code) indentation intact, and drop leading and +/// trailing blank lines. Interior blank lines are preserved. +fn dedent_block(lines: &[String]) -> Vec { + let mut out: Vec = lines + .iter() + .map(|l| l.strip_prefix(" ").unwrap_or(l).trim_end().to_string()) + .collect(); + while out.first().is_some_and(|l| l.is_empty()) { + out.remove(0); + } + while out.last().is_some_and(|l| l.is_empty()) { + out.pop(); + } + out +} + +// --------------------------------------------------------------------------- +// Arguments. +// --------------------------------------------------------------------------- + +/// Group the (already de-indented) argument-section lines into blocks +/// — runs of consecutive non-blank lines — then turn each block into +/// an [`Argument`]. Blocks that aren't an argument header (`Note:`, +/// `Tip:`, free prose) are folded into the preceding argument's +/// description. +fn parse_arguments(lines: &[String]) -> Vec { + let mut args: Vec = Vec::new(); + for block in blocks(lines) { + let first = &block[0]; + match parse_arg_header(first) { + Some(header) => { + let mut description = Vec::new(); + let head = header.prose.trim().to_string(); + if !head.is_empty() { + description.push(head); + } + for line in &block[1..] { + description.push(line.trim().to_string()); + } + args.push(Argument { + kind: header.kind, + // Provisional: the positional's placeholder name, or + // empty for a flag. `finalize_arguments` sanitizes + // and de-collides it into the final identifier. + ident: header.name_hint.unwrap_or_default(), + flag: header.flag, + required: header.required, + synthesized: false, + description, + }); + } + None => { + // A `Note:` / `Tip:` / prose continuation block. Attach + // it to the most recent argument, separated by a blank + // line, so the context survives into hover. + if let Some(prev) = args.last_mut() { + prev.description.push(String::new()); + for line in &block { + prev.description.push(line.trim().to_string()); + } + } + } + } + } + args +} + +/// Split lines into blocks of consecutive non-empty lines. +fn blocks(lines: &[String]) -> Vec> { + let mut out: Vec> = Vec::new(); + let mut cur: Vec = Vec::new(); + for line in lines { + if line.trim().is_empty() { + if !cur.is_empty() { + out.push(std::mem::take(&mut cur)); + } + } else { + cur.push(line.clone()); + } + } + if !cur.is_empty() { + out.push(cur); + } + out +} + +/// The structured outcome of parsing an argument header's first line. +struct ArgHeader { + kind: ArgKind, + /// Flag name without the dash, or `None` for a positional. + flag: Option, + /// A positional's identifier hint, recovered from its `` + /// (e.g. `` → `hw_sio_linkgroups`). `None` for a + /// flag, whose identifier comes from its flag name. + name_hint: Option, + required: bool, + /// The description text that followed the ` - ` separator on the + /// header line. + prose: String, +} + +/// Parse the first line of an argument block. Returns `None` when the +/// line is not a flag/positional header (so the caller treats the +/// block as a note attached to the previous argument). +fn parse_arg_header(line: &str) -> Option { + let mut input = line; + if let Ok(flag) = flag_lead.parse_next(&mut input) { + let (placeholder, prose) = split_separator(input); + let kind = if placeholder.trim().is_empty() { + ArgKind::Boolean + } else { + ArgKind::Value + }; + return Some(ArgHeader { + kind, + flag: Some(flag.to_string()), + name_hint: None, + required: is_required(&prose), + prose, + }); + } + if let Ok(inner) = positional_lead.parse_next(&mut input) { + let (_ellipsis, prose) = split_separator(input); + return Some(ArgHeader { + kind: ArgKind::Positional, + flag: None, + name_hint: first_ident_token(inner), + required: is_required(&prose), + prose, + }); + } + None +} + +/// The first `[A-Za-z_][A-Za-z0-9_]*` token in a positional's +/// placeholder text (`hw_sio_linkgroups`, `arg1 arg2 ...` → `arg1`). +/// `None` when the placeholder has no identifier-shaped run (`[0:750]`). +fn first_ident_token(inner: &str) -> Option { + let mut chars = inner.char_indices().peekable(); + while let Some(&(start, c)) = chars.peek() { + if c.is_ascii_alphabetic() || c == '_' { + let mut end = start; + for (i, c) in inner[start..].char_indices() { + if c.is_ascii_alphanumeric() || c == '_' { + end = start + i + c.len_utf8(); + } else { + break; + } + } + return Some(inner[start..end].to_string()); + } + chars.next(); + } + None +} + +/// `-ident` — consume the dash and flag name, leaving the rest of the +/// line in `input`. Returns the flag name without the dash. +fn flag_lead<'s>(input: &mut &'s str) -> ModalResult<&'s str> { + preceded('-', ident).parse_next(input) +} + +/// `<...>` (with an optional trailing `...`) — consume the angle-bracket +/// placeholder that introduces a positional operand, leaving the rest +/// of the line in `input`. Returns the text inside the brackets. +fn positional_lead<'s>(input: &mut &'s str) -> ModalResult<&'s str> { + '<'.parse_next(input)?; + let inner = take_while(0.., |c: char| c != '>').parse_next(input)?; + '>'.parse_next(input)?; + let _ = opt("...").parse_next(input)?; + // Note: do not consume the space after `>` — it is part of the + // ` - ` separator that `split_separator` looks for. + Ok(inner) +} + +/// An htcl-identifier run: `[A-Za-z0-9_]+`. +fn ident<'s>(input: &mut &'s str) -> ModalResult<&'s str> { + take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_') + .parse_next(input) +} + +/// Split a header remainder on its first ` - ` separator into the +/// (placeholder, description) halves. With no separator the whole +/// remainder is taken as the description (and the placeholder is +/// empty), which makes a bare `-flag` a boolean toggle. +fn split_separator(rest: &str) -> (String, String) { + match rest.find(" - ") { + Some(idx) => ( + rest[..idx].trim().to_string(), + rest[idx + 3..].trim().to_string(), + ), + None => (String::new(), rest.trim().to_string()), + } +} + +/// Whether an argument's description marks it `(Required)`. +fn is_required(prose: &str) -> bool { + prose + .trim_start() + .to_ascii_lowercase() + .starts_with("(required") +} + +// --------------------------------------------------------------------------- +// See Also. +// --------------------------------------------------------------------------- + +/// Extract the command names from `See Also` bullet lines +/// (` * get_clocks`). +fn parse_see_also(lines: &[String]) -> Vec { + let mut out = Vec::new(); + for line in lines { + if let Ok(name) = see_also_entry.parse_next(&mut line.as_str()) { + if !name.is_empty() { + out.push(name.to_string()); + } + } + } + out +} + +/// ` * ` — a See-Also bullet. Returns the command name. +fn see_also_entry<'s>(input: &mut &'s str) -> ModalResult<&'s str> { + space0.parse_next(input)?; + '*'.parse_next(input)?; + space0.parse_next(input)?; + take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_') + .parse_next(input) +} + +// --------------------------------------------------------------------------- +// Finalization: identifiers, de-collision, synthesized operands. +// --------------------------------------------------------------------------- + +/// Assign final htcl identifiers, de-collide duplicates, and synthesize +/// a generic trailing operand when the page documented no positional. +fn finalize_arguments(page: &mut ManPage) { + let mut used: std::collections::HashSet = + std::collections::HashSet::new(); + let mut has_positional = false; + + let args = std::mem::take(&mut page.arguments); + for mut arg in args { + let base = match arg.kind { + ArgKind::Positional => { + has_positional = true; + if arg.ident.is_empty() { + "operands".to_string() + } else { + arg.ident.clone() + } + } + _ => arg.flag.clone().unwrap_or_else(|| "arg".to_string()), + }; + let base = sanitize_ident(&base); + // A duplicate flag (the page listing `-foo` twice) is dropped; + // a positional that collides with a flag is renamed. + if used.contains(&base) { + if arg.is_flag() { + continue; + } + arg.ident = unique_ident(&base, &mut used); + } else { + used.insert(base.clone()); + arg.ident = base; + } + page.arguments.push(arg); + } + + if !has_positional { + let ident = unique_ident("operands", &mut used); + page.arguments.push(Argument { + kind: ArgKind::Positional, + ident, + flag: None, + required: false, + synthesized: true, + description: vec![ + "Positional operands passed through to the underlying \ + command (object patterns, names, files, …)." + .to_string(), + ], + }); + } +} + +/// First unused identifier in the `base`, `base_2`, `base_3`, … family. +fn unique_ident( + base: &str, + used: &mut std::collections::HashSet, +) -> String { + let base = sanitize_ident(base); + if used.insert(base.clone()) { + return base; + } + for n in 2.. { + let candidate = format!("{base}_{n}"); + if used.insert(candidate.clone()) { + return candidate; + } + } + unreachable!("exhausted identifier suffixes") +} + +/// Coerce an arbitrary string into the htcl proc-arg grammar +/// (`[A-Za-z_][A-Za-z0-9_]*`). Non-conforming characters become +/// underscores; a digit-leading or empty result gets a leading +/// underscore. Never produces the Tcl-reserved varargs name `args`. +fn sanitize_ident(s: &str) -> String { + let mut out = String::with_capacity(s.len() + 1); + for c in s.chars() { + if c.is_ascii_alphanumeric() || c == '_' { + out.push(c); + } else { + out.push('_'); + } + } + let needs_lead = out + .as_bytes() + .first() + .map(|b| b.is_ascii_digit()) + .unwrap_or(true); + if needs_lead { + out.insert(0, '_'); + } + if out == "args" { + out = "args_".to_string(); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + fn header(line: &str) -> ArgHeader { + parse_arg_header(line).expect("expected an argument header") + } + + #[test] + fn classifies_value_flag() { + let h = header("-fileset - (Optional) The fileset."); + assert_eq!(h.kind, ArgKind::Value); + assert_eq!(h.flag.as_deref(), Some("fileset")); + assert!(!h.required); + assert!(h.prose.starts_with("(Optional)")); + } + + #[test] + fn classifies_boolean_flag() { + let h = header("-norecurse - (Optional) Do not recurse."); + assert_eq!(h.kind, ArgKind::Boolean); + assert_eq!(h.flag.as_deref(), Some("norecurse")); + } + + #[test] + fn required_value_flag() { + let h = header("-period - (Required) The period."); + assert_eq!(h.kind, ArgKind::Value); + assert!(h.required); + } + + #[test] + fn multiword_placeholder_is_value_flag() { + let h = header("-waveform - (Optional) Edges."); + assert_eq!(h.kind, ArgKind::Value); + } + + #[test] + fn classifies_positional_and_recovers_name() { + let h = header(" - (Required) Objects to remove."); + assert_eq!(h.kind, ArgKind::Positional); + assert_eq!(h.name_hint.as_deref(), Some("hw_sio_linkgroups")); + assert!(h.required); + } + + #[test] + fn positional_without_marker_is_optional() { + let h = header(" - Version of the library."); + assert_eq!(h.kind, ArgKind::Positional); + assert!(!h.required); + } + + #[test] + fn non_header_block_is_rejected() { + assert!(parse_arg_header("Note: this is a note.").is_none()); + assert!(parse_arg_header("Plain prose continuation.").is_none()); + } + + #[test] + fn first_ident_token_extraction() { + assert_eq!(first_ident_token("name").as_deref(), Some("name")); + assert_eq!(first_ident_token("arg1 arg2 ...").as_deref(), Some("arg1")); + assert_eq!(first_ident_token("[0:750]"), None); + } + + #[test] + fn sanitize_ident_never_yields_varargs() { + assert_eq!(sanitize_ident("args"), "args_"); + assert_eq!(sanitize_ident("64bit"), "_64bit"); + assert_eq!(sanitize_ident("a-b.c"), "a_b_c"); + } + + #[test] + fn de_collides_positional_against_flag() { + // A flag `-name` and a positional `` must not collide. + let page = parse_man_page( + "demo", + "\nArguments:\n\n -name - (Optional) The flag.\n\n \ + - (Required) The operand.\n", + ); + let idents: Vec<&str> = + page.arguments.iter().map(|a| a.ident.as_str()).collect(); + assert!(idents.contains(&"name")); + assert!(idents.contains(&"name_2")); + } + + #[test] + fn drops_duplicate_flag() { + let page = parse_man_page( + "demo", + "\nArguments:\n\n -quiet - (Optional) Quietly.\n\n \ + -quiet - (Optional) Quietly again.\n", + ); + let quiets = + page.arguments.iter().filter(|a| a.ident == "quiet").count(); + assert_eq!(quiets, 1); + } +} diff --git a/vw-htcl-cmd/tests/generate.rs b/vw-htcl-cmd/tests/generate.rs new file mode 100644 index 0000000..2794570 --- /dev/null +++ b/vw-htcl-cmd/tests/generate.rs @@ -0,0 +1,310 @@ +// Integration tests: parse synthetic and real man pages, then prove the +// generated htcl re-parses cleanly through `vw_htcl` (the same parser +// `vw check` and the LSP use). + +use std::path::Path; + +use vw_htcl_cmd::{generate, parse_man_page, ArgKind, GenerateOptions}; + +/// A man page exercising every argument shape: required value flag, +/// optional value flag, boolean flag, a multi-word placeholder, +/// required positional, optional positional, and a `Note:` block that +/// must fold into the preceding argument. +const SAMPLE: &str = " +Description: + + Creates a thing. Pass it a list like {a b c} and it just works. + + Returns the created thing, or an error if it fails. + +Arguments: + + -period - (Required) The period, must be > 0. + + -name - (Optional) The name of the thing. + + -waveform - (Optional) Edge times. + + -add - (Optional) Add instead of replace. + + -quiet - (Optional) Execute quietly. + + Note: errors on the command line are still returned. + + - (Required) The source objects. + +Examples: + + make_thing -period 10 + +See Also: + + * destroy_thing + * get_things +"; + +fn assert_reparses(htcl: &str) { + let parsed = vw_htcl::parse(htcl); + assert!( + parsed.errors.is_empty(), + "generated htcl failed to parse: {:#?}\n---\n{htcl}", + parsed.errors + ); +} + +#[test] +fn parses_every_argument_shape() { + let page = parse_man_page("make_thing", SAMPLE); + + assert_eq!(page.name, "make_thing"); + assert_eq!(page.see_also, vec!["destroy_thing", "get_things"]); + + let by_ident = |id: &str| { + page.arguments + .iter() + .find(|a| a.ident == id) + .unwrap_or_else(|| panic!("missing arg {id}")) + }; + + let period = by_ident("period"); + assert_eq!(period.kind, ArgKind::Value); + assert!(period.required); + + let name = by_ident("name"); + assert_eq!(name.kind, ArgKind::Value); + assert!(!name.required); + + let waveform = by_ident("waveform"); + assert_eq!(waveform.kind, ArgKind::Value, "multi-word placeholder"); + + let add = by_ident("add"); + assert_eq!(add.kind, ArgKind::Boolean); + + // The `Note:` block must have folded into -quiet's description. + let quiet = by_ident("quiet"); + assert!( + quiet + .description + .iter() + .any(|l| l.contains("still returned")), + "Note block did not fold into -quiet: {:?}", + quiet.description + ); + + let objects = by_ident("objects"); + assert_eq!(objects.kind, ArgKind::Positional); + assert!(objects.required); + // A positional was documented, so none is synthesized. + assert!(page.arguments.iter().all(|a| !a.synthesized)); +} + +#[test] +fn generated_wrapper_reparses() { + let page = parse_man_page("make_thing", SAMPLE); + let htcl = generate(&page, &GenerateOptions::default()); + + // Doc braces are neutralized so the arg-list brace match survives. + assert!( + htcl.contains("{a b c}".replace('{', "(").replace('}', ")").as_str()) + ); + // Natural name + extern-prefixed forward (lowering autogen + // produces the rename plumbing at session startup). + assert!(htcl.contains("proc make_thing {")); + assert!(!htcl.contains("rename")); + // New body shape: non-typed args accumulate into `flags` via + // lappend (safe — strings only). Typed args (objects, cell, + // pin, ...) are passed directly via `-flag $value` so Vivado's + // typed Tcl_Obj survives. + assert!( + htcl.contains("lappend flags -period $period"), + "non-typed value-flag should lappend into flags: {htcl}" + ); + assert!( + htcl.contains("if {$add} { lappend flags -add }"), + "boolean should lappend into flags only when true: {htcl}" + ); + // `objects` is in TYPED_ARG_NAMES → must NOT be lappended + // (would shimmer the typed handle). Must appear in a direct + // invocation as `-objects $objects` or as positional `$objects`. + assert!( + !htcl.contains("lappend flags {*}$objects"), + "typed arg `objects` must not be lappended: {htcl}" + ); + assert!( + !htcl.contains("lappend cmd"), + "old `cmd`-accumulator shape should be gone: {htcl}" + ); + assert!( + htcl.contains("extern::make_thing {*}$flags"), + "direct invocation with {{*}}$flags expected: {htcl}" + ); + assert!( + htcl.contains("$objects"), + "objects must be referenced in the invocation: {htcl}" + ); + + assert_reparses(&htcl); +} + +#[test] +fn synthesizes_operand_when_no_positional() { + let page = parse_man_page( + "current_thing", + "\nDescription:\n\n Gets the current thing.\n\nArguments:\n\n \ + -quiet - (Optional) Quietly.\n", + ); + let synth: Vec<_> = + page.arguments.iter().filter(|a| a.synthesized).collect(); + assert_eq!(synth.len(), 1, "exactly one synthesized operand"); + assert_eq!(synth[0].kind, ArgKind::Positional); + assert!(!synth[0].required); + + assert_reparses(&generate(&page, &GenerateOptions::default())); +} + +#[test] +fn empty_man_page_still_generates_valid_wrapper() { + // No Description, no Arguments — the generator must still emit a + // parseable, self-contained wrapper. + let page = parse_man_page("noop", ""); + let htcl = generate(&page, &GenerateOptions::default()); + assert!(htcl.contains("proc noop {")); + assert_reparses(&htcl); +} + +/// Smoke test over the real Vivado man pages when a local install is +/// present: every page must generate htcl that re-parses cleanly. +#[test] +fn real_man_pages_reparse() { + let dir = "/home/ry/Xilinx/2025.1/Vivado/doc/eng/man"; + if !Path::new(dir).exists() { + eprintln!("skipping: {dir} not present"); + return; + } + let mut checked = 0; + let mut failures = Vec::new(); + let mut stack = vec![std::path::PathBuf::from(dir)]; + while let Some(d) = stack.pop() { + for entry in std::fs::read_dir(&d).unwrap().flatten() { + let path = entry.path(); + if path.is_dir() { + stack.push(path); + continue; + } + let stem = match path.file_name().and_then(|s| s.to_str()) { + Some(s) + if s.chars().all(|c| { + c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' + }) => + { + s + } + _ => continue, // skip tmp.* / *_Copy junk + }; + let text = std::fs::read_to_string(&path).unwrap(); + let page = parse_man_page(stem, &text); + let htcl = generate(&page, &GenerateOptions::default()); + let parsed = vw_htcl::parse(&htcl); + if !parsed.errors.is_empty() { + failures.push(format!("{stem}: {:?}", parsed.errors)); + } + checked += 1; + } + } + eprintln!("checked {checked} real man pages"); + assert!(checked > 500, "expected many man pages, saw {checked}"); + assert!( + failures.is_empty(), + "{} man pages produced unparseable htcl:\n{}", + failures.len(), + failures.join("\n") + ); +} + +// --- return-type emission (step 5) ----------------------------------------- + +#[test] +fn returns_section_emits_type_annotation() { + let src = " +Description: + + Returns a list of cells matching the search. + +Arguments: + + -hierarchical - (Optional) Search hierarchically. + +Returns: + + a list of cells + +See Also: + + * get_cells +"; + let page = parse_man_page("get_things", src); + assert!(page.returns.is_some(), "Returns: section should be parsed"); + let htcl = generate(&page, &GenerateOptions::default()); + assert_reparses(&htcl); + // The `proc get_things { … } list { … }` shape. + assert!( + htcl.contains("list {"), + "expected return-type annotation in: {htcl}" + ); +} + +#[test] +fn returns_section_nothing_emits_unit() { + let src = " +Description: + + Sets things. + +Arguments: + + -quiet - (Optional) Quiet. + +Returns: + + Returns nothing. + +See Also: + + * unset_things +"; + let page = parse_man_page("set_things", src); + let htcl = generate(&page, &GenerateOptions::default()); + assert_reparses(&htcl); + assert!( + htcl.contains(" unit {"), + "expected `unit` return annotation in: {htcl}" + ); +} + +#[test] +fn page_without_returns_section_emits_no_annotation() { + let src = " +Description: + + Does a thing. + +Arguments: + + -x - (Required) Thing. + +See Also: + + * other +"; + let page = parse_man_page("do_a_thing", src); + assert!(page.returns.is_none()); + let htcl = generate(&page, &GenerateOptions::default()); + assert_reparses(&htcl); + // No return-type annotation present. + assert!( + !htcl.contains("unit {") + && !htcl.contains("bd_cell {") + && !htcl.contains("list<"), + "unannotated page should not synthesize a return type: {htcl}" + ); +} diff --git a/vw-htcl/Cargo.toml b/vw-htcl/Cargo.toml new file mode 100644 index 0000000..91aecab --- /dev/null +++ b/vw-htcl/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "vw-htcl" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "htcl language layer: parser, AST, name resolution, signature checking, TCL emission" + +[dependencies] +serde.workspace = true +thiserror.workspace = true +winnow.workspace = true +camino.workspace = true +vw-quote = { path = "../vw-quote" } + +[dev-dependencies] +tempfile.workspace = true diff --git a/vw-htcl/src/ast.rs b/vw-htcl/src/ast.rs new file mode 100644 index 0000000..d8a685f --- /dev/null +++ b/vw-htcl/src/ast.rs @@ -0,0 +1,500 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Concrete syntax tree for htcl. +//! +//! Every node carries a [`Span`] so the same tree drives diagnostics, +//! hover, navigation, and source-faithful lowering back to TCL. The +//! tree is concrete in the sense that it retains enough information to +//! recover the original source (comments, blank lines, word forms); +//! later passes derive a stripped AST for analysis. + +use crate::span::Span; + +#[derive(Clone, Debug)] +pub struct Document { + pub stmts: Vec, + pub span: Span, +} + +// `Stmt::Command(Command)` is ~320 bytes while the other variants +// are <50; clippy flags the size disparity and suggests boxing +// `Command`. We don't box because: +// - Commands are by far the most common variant (often >95% of +// Stmt instances in real source), so the boxed-pointer +// indirection on the hot path would cost more than the +// wasted bytes in rare Comment/Error variants. +// - Boxing would ripple through ~50 pattern-match sites +// (`let Stmt::Command(cmd) = ...`) and complicate the AST's +// "by-value clone-and-mutate" rewrite passes. +#[derive(Clone, Debug)] +#[allow(clippy::large_enum_variant)] +pub enum Stmt { + Command(Command), + Comment(Comment), + Error(ParseFailure), +} + +impl Stmt { + pub fn span(&self) -> Span { + match self { + Stmt::Command(c) => c.span, + Stmt::Comment(c) => c.span, + Stmt::Error(e) => e.span, + } + } +} + +/// A single TCL command — a whitespace-separated sequence of words, +/// terminated by newline, semicolon, or EOF. +#[derive(Clone, Debug)] +pub struct Command { + pub words: Vec, + pub span: Span, + pub kind: CommandKind, + /// Doc comments (`##`) immediately preceding the command, in + /// source order with the `##` prefix stripped. + pub doc_comments: Vec, +} + +/// Recognized command shapes. Generic covers any unrecognized command; +/// specific variants exist so downstream passes (symbol tables, the +/// LSP, the structured-proc work in Phase 2) can act on them without +/// re-parsing. +#[derive(Clone, Debug)] +pub enum CommandKind { + Generic, + Set, + Proc(Proc), + Src(SrcImport), + NamespaceEval(NamespaceEval), + /// A `type = ` declaration. Compile-time only + /// — never lowered to Tcl. Together with the required + /// `::repr` / `from` / `to` procs (enforced by the + /// validator), introduces a new newtype the rest of the program + /// can reference in return-type annotations and (later) arg + /// annotations. + TypeDecl(TypeDecl), + /// An `enum = { }` declaration. Compile-time + /// only — the lowerer emits the auto-generated constructor / + /// repr / accessor procs through the repr-codegen path, NOT via + /// shipping the source verbatim. Variants are + /// `IDENT (':' TYPE)?`; the optional `:TYPE` payload makes + /// empty-payload variants first-class. + EnumDecl(EnumDecl), +} + +/// A `namespace eval { }` block. +/// +/// Recognized at parse time so that any `proc` declarations inside +/// the braces register in the document's signature table under the +/// qualified name `::` (Tcl namespace semantics), and +/// the analyzer can offer the same hover / completion / signature +/// help / goto experience for namespaced procs as for top-level +/// ones. The body parses as a script just like a proc body, so +/// nested `namespace eval` blocks compose. +#[derive(Clone, Debug)] +pub struct NamespaceEval { + /// Bare-text namespace name when extractable (the common case), + /// `None` when the name word couldn't be reduced to literal text + /// (e.g. it contains substitutions). Multi-segment names like + /// `foo::bar` are preserved as-is and the analyzer uses them as + /// the full prefix. + pub name: Option, + pub name_span: Span, + pub body_span: Span, + /// The body parsed into statements. Spans are absolute (whole- + /// source) coordinates, same convention as [`Proc::body`]. + pub body: Vec, +} + +/// A `src ` import — load and evaluate another htcl module. +/// +/// The path's *form* is classified at load time, not here: leading +/// `@name/` resolves through the workspace's `vw.toml` dependencies, +/// a leading `/` is filesystem-absolute, anything else is relative to +/// the importing file's directory. `path` is `None` only when the +/// path word couldn't be reduced to literal text (e.g. it contains +/// `$var` / `[cmd]` substitutions); those imports are diagnosed +/// downstream rather than parsed structurally. +#[derive(Clone, Debug)] +pub struct SrcImport { + pub path: Option, + pub path_span: Span, +} + +/// A `proc` declaration. +/// +/// The outer shape (name, args span, body span) comes from the Phase 0 +/// parser. The structured args grammar (Phase 2) is reparsed from +/// `args_span` and stored in [`signature`](Self::signature). When +/// `signature` is `None` the args body couldn't be parsed at all +/// (e.g. mid-edit syntax error); diagnostics for that live in the +/// document's parse-error list. +#[derive(Clone, Debug)] +pub struct Proc { + /// Bare-text proc name when it could be extracted; `None` for + /// programmatically-named procs (e.g. names built from + /// substitution). + pub name: Option, + pub name_span: Span, + pub args_span: Span, + pub body_span: Span, + pub signature: Option, + /// Optional return-type annotation: the 4th word of a + /// `proc NAME { args } TYPE { body }` declaration, parsed as a + /// [`TypeExpr`]. `None` means "no annotation present" — the + /// proc still works, but downstream type-driven machinery + /// (REPL repr printer, hover, future call-site validation) + /// falls back to its untyped path. Bracketed forms like + /// `{dict}` are unwrapped before type-parsing. + pub return_type: Option, + /// Source span of the 4th-word type slot when present; `None` + /// when the proc has no annotation. The span covers the outer + /// word including any wrapping braces, so diagnostics can + /// underline the entire annotation. + pub return_type_span: Option, + /// The body parsed into statements, with spans in absolute + /// (whole-source) coordinates. Populated by a post-pass after the + /// outer parse; empty until then and for bodies that are pure + /// braced text with no commands. Lowering still ships the body + /// verbatim from [`body_span`](Self::body_span) — this field + /// exists so navigation, hover, and analysis can see *into* a + /// proc body. Nested procs declared here have their own `body` + /// populated recursively. + pub body: Vec, +} + +/// A `type NAME = UNDERLYING` declaration. +/// +/// Introduces a newtype wrapper around an existing type. The +/// validator requires the user to ALSO define three procs in the +/// `::` namespace: `repr` (rendering to a `string`), `from` +/// (lifting an underlying value, with optional validation), and +/// `to` (extracting the underlying value). See the validator for +/// the exact signature shapes enforced. +/// +/// Compile-time only — never lowered to Tcl. The newtype's runtime +/// representation is identical to the underlying type; the +/// distinction lives entirely in the analyzer / printer / future +/// type-checker. +#[derive(Clone, Debug)] +pub struct TypeDecl { + /// Bare-text type name when extractable; `None` for + /// programmatically-named declarations (vanishingly rare; + /// kept consistent with [`Proc::name`]'s convention). + pub name: Option, + pub name_span: Span, + /// The underlying type, parsed from the right-hand side of `=`. + /// `None` when the right-hand side couldn't be parsed as a + /// type expression (e.g. mid-edit). Diagnostics for that live + /// in the document's parse-error list. + pub underlying: Option, + /// Span of the underlying-type word (outer, including any + /// wrapping braces) so diagnostics can underline it. + pub underlying_span: Span, +} + +/// An `enum = { }` declaration. The body is +/// brace-wrapped and newline-separated; each variant is +/// `IDENT (':' TYPE)?`. +/// +/// Compile-time only — codegen lowers this to a `namespace eval +/// { … }` block containing auto-generated constructors, +/// `repr`/`from`/`to`, and `tag`/`payload` accessors. The +/// validator enforces variant-name uniqueness within an enum and +/// that variant payload types reference known type names. +#[derive(Clone, Debug)] +pub struct EnumDecl { + /// Bare-text enum name when extractable. + pub name: Option, + pub name_span: Span, + /// Declared variants, in source order. + pub variants: Vec, + /// Span of the brace-wrapped variants block (outer, including + /// the braces) so diagnostics can underline it. + pub body_span: Span, +} + +/// One variant inside an [`EnumDecl`]. `payload` is `None` for +/// empty-payload variants (e.g. `North` in `enum Direction = { +/// North; South: int }`). +#[derive(Clone, Debug)] +pub struct EnumVariant { + pub name: String, + pub name_span: Span, + pub payload: Option, + /// Span of the payload-type word, or zero-length at the + /// variant's end position for empty-payload variants. + pub payload_span: Span, + /// Span covering the full `NAME (':' TYPE)?` form. + pub span: Span, +} + +/// Side-table entry produced by the validator's overload-classifier +/// pass: for a public proc name that resolved to an enum-overload +/// set, records which enum drives dispatch and where each variant's +/// specialization lives. The codegen step uses this to synthesize +/// the public dispatcher proc; the analyzer uses it to render +/// overload information in hover / signature help. +#[derive(Clone, Debug)] +pub struct OverloadInfo { + /// The public, user-facing proc name (e.g. `handle_prop`). + pub public_name: String, + /// The enum this overload set dispatches on (e.g. `Property`). + pub enum_name: String, + /// Shared arg name across all overload arms (e.g. `v`). The + /// validator enforces every arm uses the same name so the + /// dispatcher can pass the payload via the kwargs protocol + /// (`- `) without per-arm gymnastics. + pub dispatch_arg_name: String, + /// One entry per variant, in declaration order on the enum. + pub variants: Vec, + /// Span on the first overload's name — used as the diagnostic + /// anchor for overload-set-wide errors. + pub anchor_span: Span, +} + +#[derive(Clone, Debug)] +pub struct OverloadVariant { + /// The variant short-name (e.g. `Scalar`, `Nested`). + pub variant_name: String, + /// The mangled internal proc name the specialization runs + /// under at runtime (e.g. `__handle_prop__Scalar`). + pub mangled_proc_name: String, + /// Span of the variant-arg annotation on the specialization's + /// first argument — diagnostic anchor when something's + /// specifically wrong with this arm. + pub dispatch_arg_span: Span, +} + +/// A type expression — the syntactic form of a type used in +/// `proc NAME { args } TYPE { body }` return annotations and on the +/// right-hand side of `type NAME = TYPE` declarations. +/// +/// Newtypes (`bd_cell`, `widget`, user inventions) and primitives +/// (`string`, `int`, `bool`, `unit`) share the [`Named`] variant — +/// the distinction lives in the validator's type table, not the +/// AST. Containers (`list`, `dict`, and any future shape +/// with the same `name` surface) are [`Generic`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum TypeExpr { + Named { + name: String, + span: Span, + }, + Generic { + name: String, + name_span: Span, + args: Vec, + /// Full span including `<` … `>`. + span: Span, + }, + /// `Enum::Variant` — a qualified path naming a single variant + /// of a declared enum. Only legal as an arg-type annotation on + /// an overloaded handler proc (the dispatch indicator); the + /// validator rejects this variant anywhere else (return types, + /// generic args, nested positions). + Qualified { + namespace: String, + variant: String, + namespace_span: Span, + variant_span: Span, + /// Full span covering `namespace::variant`. + span: Span, + }, +} + +impl TypeExpr { + pub fn name(&self) -> &str { + match self { + TypeExpr::Named { name, .. } | TypeExpr::Generic { name, .. } => { + name + } + TypeExpr::Qualified { namespace, .. } => namespace, + } + } + + pub fn span(&self) -> Span { + match self { + TypeExpr::Named { span, .. } + | TypeExpr::Generic { span, .. } + | TypeExpr::Qualified { span, .. } => *span, + } + } +} + +/// Structured proc-argument signature. +/// +/// One entry per declared argument, in source order. The order is the +/// canonical positional order used when lowering keyword-arg call +/// sites to Tcl-positional calls for the EDA backend. +#[derive(Clone, Debug)] +pub struct ProcSignature { + pub args: Vec, + pub span: Span, + /// The declared return type, copied here from [`Proc::return_type`] + /// at parse time so the signature-table-based lookup paths + /// (REPL formatter, hover) don't have to re-walk back to the + /// Proc node. `None` for unannotated procs. + pub return_type: Option, +} + +impl ProcSignature { + pub fn find(&self, name: &str) -> Option<&ProcArg> { + self.args.iter().find(|a| a.name == name) + } +} + +#[derive(Clone, Debug)] +pub struct ProcArg { + pub name: String, + pub name_span: Span, + pub doc_comments: Vec, + pub attributes: Vec, + /// Optional `: TYPE` annotation on the arg. `Some` when the + /// source carries `name: bd_cell` style; `None` when the arg + /// is untyped (the legacy form). Used by the validator (full + /// shape check on newtype repr/from/to procs) and by the + /// analyzer's hover / signature-help displays. + pub type_annotation: Option, + pub span: Span, +} + +impl ProcArg { + pub fn attribute(&self, name: &str) -> Option<&Attribute> { + self.attributes.iter().find(|a| a.name == name) + } +} + +/// Raw attribute as parsed: name plus zero or more comma-separated +/// values. Semantic interpretation (default, required, enum, range, +/// requires, conflicts, deprecated) lives in the validators, not +/// here — keeping the AST shape unopinionated lets new attribute +/// names land without a parser change. +#[derive(Clone, Debug)] +pub struct Attribute { + pub name: String, + pub name_span: Span, + pub values: Vec, + pub span: Span, +} + +#[derive(Clone, Debug)] +pub enum AttributeValue { + Integer { value: i64, span: Span }, + String { value: String, span: Span }, + Ident { value: String, span: Span }, +} + +impl AttributeValue { + pub fn span(&self) -> Span { + match self { + AttributeValue::Integer { span, .. } + | AttributeValue::String { span, .. } + | AttributeValue::Ident { span, .. } => *span, + } + } + + /// Render the value back to a Tcl-style literal, suitable for + /// comparison against a runtime arg or for emitting in lowered + /// Tcl. Integers and idents stringify as-is; strings get + /// double-quoted with naive escaping. + pub fn to_tcl_literal(&self) -> String { + match self { + AttributeValue::Integer { value, .. } => value.to_string(), + AttributeValue::Ident { value, .. } => value.clone(), + AttributeValue::String { value, .. } => { + let escaped = value.replace('\\', "\\\\").replace('"', "\\\""); + format!("\"{escaped}\"") + } + } + } + + pub fn as_str(&self) -> &str { + match self { + AttributeValue::Ident { value, .. } + | AttributeValue::String { value, .. } => value, + AttributeValue::Integer { .. } => "", + } + } +} + +#[derive(Clone, Debug)] +pub struct Comment { + /// Comment text with the leading `#` removed; for `##` doc + /// comments, both `#`s are removed. + pub text: String, + pub span: Span, + pub is_doc: bool, +} + +#[derive(Clone, Debug)] +pub struct ParseFailure { + pub message: String, + pub span: Span, +} + +#[derive(Clone, Debug)] +pub struct Word { + pub form: WordForm, + pub parts: Vec, + pub span: Span, +} + +impl Word { + /// If this word is a single literal text part (no interpolation), + /// return its value. Useful for matching command names, fixed + /// keywords, and option flags without rebuilding the string. + pub fn as_text(&self) -> Option<&str> { + match self.parts.as_slice() { + [WordPart::Text { value, .. }] => Some(value), + _ => None, + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum WordForm { + Bare, + Quoted, + Braced, +} + +#[derive(Clone, Debug)] +pub enum WordPart { + Text { + value: String, + span: Span, + }, + VarRef { + name: String, + span: Span, + }, + /// `[ cmd ... ]` command substitution. `source` is the raw interior + /// text (between the brackets) and `span` covers the whole + /// `[...]`. `body` is populated by a post-pass that recursively + /// parses the interior into statements with absolute spans, so + /// hover / goto / signature-help can descend in. + CmdSubst { + source: String, + span: Span, + body: Vec, + }, + Escape { + value: char, + span: Span, + }, +} + +impl WordPart { + pub fn span(&self) -> Span { + match self { + WordPart::Text { span, .. } + | WordPart::VarRef { span, .. } + | WordPart::CmdSubst { span, .. } + | WordPart::Escape { span, .. } => *span, + } + } +} diff --git a/vw-htcl/src/cmdline.rs b/vw-htcl/src/cmdline.rs new file mode 100644 index 0000000..f6bf56a --- /dev/null +++ b/vw-htcl/src/cmdline.rs @@ -0,0 +1,228 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Lightweight analysis of the partially-typed command at the cursor. +//! +//! Completion and signature help need to know, mid-edit, which command +//! the cursor sits in and which word is being typed. The full AST is +//! unreliable here *precisely because* the text is incomplete, so we +//! scan the raw source backward to the nearest command boundary +//! (newline, `;`, or the `[` that opens a command substitution) and +//! tokenize on whitespace. This is a deliberately shallow Tcl reader — +//! good enough to drive IDE affordances, not to execute. + +use crate::span::Span; + +#[derive(Clone, Debug)] +pub struct CmdLine<'a> { + /// Whitespace-separated complete words before the cursor. The + /// first, when present, is the command name. + pub words: Vec<&'a str>, + /// The word currently under the cursor: the trailing token when + /// the prefix doesn't end in whitespace, otherwise empty. + pub partial: &'a str, + /// Span of `partial` in the source — the range a completion should + /// replace. Zero-width (an insertion point) when `partial` is + /// empty. + pub partial_span: Span, +} + +impl CmdLine<'_> { + /// The command name (first complete word). `None` while the cursor + /// is still on the first word — i.e. command-name position. + pub fn command_name(&self) -> Option<&str> { + self.words.first().copied() + } + + /// True when the cursor is in command-name position (no complete + /// words precede it). + pub fn in_command_position(&self) -> bool { + self.words.is_empty() + } + + /// Flags (`-foo`) already supplied among the complete words after + /// the command name. + pub fn used_flags(&self) -> impl Iterator { + self.words + .iter() + .skip(1) + .copied() + .filter(|w| w.starts_with('-')) + } +} + +/// Analyze the command the cursor at `offset` is editing. +pub fn analyze(source: &str, offset: u32) -> CmdLine<'_> { + let off = (offset as usize).min(source.len()); + let bytes = source.as_bytes(); + + // Walk back to the start of the current command. The boundary + // depends on the cursor's *bracket nesting*: inside a `[ … ]`, + // newlines are whitespace (matching the parser), only `;` and the + // opening `[` terminate. Outside brackets, `\n` and `;` both + // terminate at the cursor's level. + // + // We track depth as we walk backward — each `]` going back means + // we're entering a deeper region, each `[` brings us back out. If + // we hit an unmatched `[` (the opening bracket of the substitution + // the cursor sits in), that's the command boundary. Otherwise the + // closest `\n`/`;` we passed at depth 0 wins. We have to scan past + // a candidate `\n`/`;` because an enclosing `[` further back would + // override it. + let mut depth: i32 = 0; + let mut nearest_top_sep: Option = None; + let mut bracket_open: Option = None; + let mut i = off; + while i > 0 { + i -= 1; + match bytes[i] { + b']' => depth += 1, + b'[' => { + if depth > 0 { + depth -= 1; + } else { + bracket_open = Some(i + 1); + break; + } + } + b'\n' | b';' if depth == 0 && nearest_top_sep.is_none() => { + nearest_top_sep = Some(i + 1); + } + _ => {} + } + } + let start = bracket_open.or(nearest_top_sep).unwrap_or(0); + let prefix = &source[start..off]; + + // The partial word is the trailing run of non-whitespace, unless + // the prefix already ends in whitespace (then we're between words). + let partial_len: usize = prefix + .chars() + .rev() + .take_while(|c| !c.is_whitespace()) + .map(char::len_utf8) + .sum(); + let split = prefix.len() - partial_len; + let head = &prefix[..split]; + let partial = &prefix[split..]; + + CmdLine { + words: head.split_whitespace().collect(), + partial, + partial_span: Span::new((start + split) as u32, off as u32), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn at_end(src: &str) -> CmdLine<'_> { + analyze(src, src.len() as u32) + } + + #[test] + fn command_position_with_partial() { + let line = at_end("gr"); + assert!(line.in_command_position()); + assert_eq!(line.partial, "gr"); + assert_eq!(line.partial_span, Span::new(0, 2)); + } + + #[test] + fn argument_position_after_name() { + let line = at_end("greet "); + assert!(!line.in_command_position()); + assert_eq!(line.command_name(), Some("greet")); + assert_eq!(line.partial, ""); + assert_eq!(line.partial_span, Span::new(6, 6)); + } + + #[test] + fn partial_flag_after_name() { + let line = at_end("greet -na"); + assert_eq!(line.command_name(), Some("greet")); + assert_eq!(line.partial, "-na"); + assert_eq!(line.partial_span.slice("greet -na"), "-na"); + } + + #[test] + fn used_flags_are_reported() { + let line = at_end("f -a 1 -b "); + let used: Vec<&str> = line.used_flags().collect(); + assert_eq!(used, vec!["-a", "-b"]); + } + + #[test] + fn resets_at_command_substitution() { + // Only the text inside the `[...]` counts as the command. + let src = "puts [greet -na"; + let line = analyze(src, src.len() as u32); + assert_eq!(line.command_name(), Some("greet")); + assert_eq!(line.partial, "-na"); + } + + #[test] + fn resets_at_newline() { + let src = "set x 1\ngr"; + let line = analyze(src, src.len() as u32); + assert!(line.in_command_position()); + assert_eq!(line.partial, "gr"); + } + + #[test] + fn ignores_newlines_inside_brackets() { + // The cursor sits on `-cell ` in a multi-line `[ … ]`. The + // analyzer must skip the intervening newlines so it can still + // see `create_cpm5_cpm_pcie0` as the command name. + let src = "\ +set x [ + create_cpm5_cpm_pcie0 + -cell "; + let line = analyze(src, src.len() as u32); + assert_eq!(line.command_name(), Some("create_cpm5_cpm_pcie0")); + assert_eq!(line.partial, ""); + // The flag in the middle counts as already-used. + let used: Vec<&str> = line.used_flags().collect(); + assert_eq!(used, vec!["-cell"]); + } + + #[test] + fn active_partial_flag_across_lines() { + // Partial `-max_link_` typed on a fresh line of a multi-line + // bracket should still be recognized as the partial word, and + // the command name should still be the bracket's first word. + let src = "\ +set x [ + create_cpm5_cpm_pcie0 + -cell cpm5 + -max_link_"; + let line = analyze(src, src.len() as u32); + assert_eq!(line.command_name(), Some("create_cpm5_cpm_pcie0")); + assert_eq!(line.partial, "-max_link_"); + } + + #[test] + fn skips_balanced_inner_brackets() { + // Walking back past a complete `[…]` shouldn't fool the + // analyzer into thinking the cursor is at top level when it's + // really inside another, *outer* bracket. + let src = "\ +set x [ + [a b] + outer "; + let line = analyze(src, src.len() as u32); + // The cursor's enclosing bracket is the outer one; its first + // word is the standalone `[a b]` substitution, not a simple + // identifier — so command_name is None, but partial is empty + // (we're between words on a continuation line). The point is + // that the *outer* bracket is what we recognized, not the + // inner one. + let used: Vec<&str> = line.used_flags().collect(); + assert!(used.is_empty(), "{used:?}"); + // `outer` is the second word inside the outer bracket; the + // first word was the `[…]` substitution itself. + assert!(line.words.contains(&"outer"), "{:?}", line.words); + } +} diff --git a/vw-htcl/src/complete.rs b/vw-htcl/src/complete.rs new file mode 100644 index 0000000..bcff0d4 --- /dev/null +++ b/vw-htcl/src/complete.rs @@ -0,0 +1,549 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Code completion for htcl. +//! +//! Two contexts, both keyed off [`cmdline::analyze`]: +//! +//! - **Command position** (typing the first word) → the names of +//! `proc`s declared in the document. +//! - **Argument position** (after a known proc's name) → that proc's +//! `-flag` arguments, minus any already supplied. +//! +//! Pure analysis: returns structured [`Completion`]s referencing the +//! document; the LSP backend maps them to `CompletionItem`s and the +//! REPL will render them its own way. Vivado builtins are not offered +//! yet — that needs the UG835 command database (project-plan Phase 8). + +use std::fmt::Write; + +use crate::ast::{ + AttributeValue, CommandKind, Document, ProcArg, ProcSignature, Stmt, +}; +use crate::cmdline::{self, CmdLine}; +use crate::span::Span; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum CompletionKind { + /// A `proc` name in command position. + Proc, + /// A `-flag` keyword argument of a known proc. + Flag, + /// A value from a flag's `@enum(...)` constraint. + EnumValue, +} + +#[derive(Clone, Debug)] +pub struct Completion { + /// Text shown in the list and inserted (`greet`, `-name`). + pub label: String, + pub kind: CompletionKind, + /// Short, single-line annotation shown inline next to the label. + pub detail: Option, + /// Longer markdown shown in the item's documentation popup. + pub documentation: Option, + /// Source range the inserted text replaces (the partial word, or a + /// zero-width insertion point between words). + pub replace: Span, +} + +struct ProcInfo<'a> { + /// Qualified name as it would be called — bare proc name for + /// top-level declarations, `::` for procs declared + /// inside `namespace eval` blocks. + name: String, + doc_comments: &'a [String], + signature: Option<&'a ProcSignature>, +} + +/// Completions available at `offset`. +pub fn complete_at( + document: &Document, + source: &str, + offset: u32, +) -> Vec { + // Inside a proc's argument-declaration braces, command/flag + // completion is meaningless (attribute completion will live here + // later). Stay quiet rather than offer nonsense. + if in_proc_args(&document.stmts, offset) { + return Vec::new(); + } + + let line = cmdline::analyze(source, offset); + let procs = collect_procs(document); + + if line.in_command_position() { + return complete_proc_names(&procs, &line); + } + + // If the previous complete word is a `-flag`, the cursor is in + // value position — even if the partial is empty (user just hit + // space after the flag). Offer the flag's `@enum(...)` choices + // when it has them; otherwise stay silent so the user can type a + // free-form value (string, int, etc.) without a flag list popping + // up in front of it. + // + // If the partial *starts with* `-` we step back into flag-typing + // mode regardless — the user is clearly typing a new flag. + let last_word_is_flag = line.words.len() >= 2 + && line.words.last().is_some_and(|w| w.starts_with('-')); + if last_word_is_flag && !line.partial.starts_with('-') { + return complete_enum_values(&procs, &line); + } + + complete_flags(&procs, &line) +} + +/// `@enum(…)` value completions when the cursor sits in value +/// position. Returns empty when the flag has no `@enum` (so the +/// caller can fall back to flag completion). +fn complete_enum_values( + procs: &[ProcInfo<'_>], + line: &CmdLine<'_>, +) -> Vec { + let Some(name) = line.command_name() else { + return Vec::new(); + }; + let Some(proc) = procs.iter().find(|p| p.name == name) else { + return Vec::new(); + }; + let Some(sig) = proc.signature else { + return Vec::new(); + }; + // The flag whose value we're completing is the last word on the + // line; if it isn't a `-flag`, the user is between options and + // there's nothing to enum-complete. + let Some(last) = line.words.last() else { + return Vec::new(); + }; + let Some(flag) = last.strip_prefix('-') else { + return Vec::new(); + }; + let Some(arg) = sig.find(flag) else { + return Vec::new(); + }; + let Some(enum_attr) = arg.attribute("enum") else { + return Vec::new(); + }; + + let needle = line.partial; + enum_attr + .values + .iter() + .filter_map(|v| { + let raw = enum_value_text(v); + // Filter by either the bare or quoted form so a user typing + // `Mas` matches the value `Master Mode` whose insert form + // is `"Master Mode"`. + if !raw.starts_with(needle) + && !quote_for_completion(&raw).starts_with(needle) + { + return None; + } + let insert = quote_for_completion(&raw); + Some(Completion { + label: insert.clone(), + kind: CompletionKind::EnumValue, + detail: Some(format!("value for -{}", arg.name)), + documentation: crate::doc::brief(&arg.doc_comments), + replace: line.partial_span, + }) + }) + .collect() +} + +fn enum_value_text(v: &AttributeValue) -> String { + match v { + AttributeValue::Integer { value, .. } => value.to_string(), + AttributeValue::Ident { value, .. } + | AttributeValue::String { value, .. } => value.clone(), + } +} + +/// Quote `s` for use as a value on a call site if it can't ride as a +/// bare word. Mirrors the rule [`crate::emit::Word::lit`] uses: bare +/// when safe, double-quoted with `\`/`"` escapes otherwise. +fn quote_for_completion(s: &str) -> String { + let needs = s.is_empty() + || s.chars().any(|c| { + c.is_whitespace() + || matches!( + c, + ';' | '"' | '\\' | '[' | ']' | '{' | '}' | '$' | '#' + ) + }); + if needs { + let escaped = s.replace('\\', "\\\\").replace('"', "\\\""); + format!("\"{escaped}\"") + } else { + s.to_string() + } +} + +fn complete_proc_names( + procs: &[ProcInfo<'_>], + line: &CmdLine<'_>, +) -> Vec { + procs + .iter() + .filter(|p| p.name.starts_with(line.partial)) + .map(|p| Completion { + label: p.name.to_string(), + kind: CompletionKind::Proc, + detail: first_doc_line(p.doc_comments), + documentation: proc_documentation(p), + replace: line.partial_span, + }) + .collect() +} + +fn complete_flags( + procs: &[ProcInfo<'_>], + line: &CmdLine<'_>, +) -> Vec { + let Some(name) = line.command_name() else { + return Vec::new(); + }; + let Some(proc) = procs.iter().find(|p| p.name == name) else { + return Vec::new(); + }; + let Some(sig) = proc.signature else { + return Vec::new(); + }; + + let used: Vec<&str> = line.used_flags().collect(); + let needle = line.partial; + let bare_needle = needle.trim_start_matches('-'); + + sig.args + .iter() + .filter_map(|arg| { + let label = format!("-{}", arg.name); + // Don't re-offer a flag already on the line, unless it's + // the very word being typed. + if used.iter().any(|u| *u == label) && needle != label { + return None; + } + // Match either the dashed form (`-na`) or the bare name + // (`na`); an empty needle matches everything. + if !label.starts_with(needle) && !arg.name.starts_with(bare_needle) + { + return None; + } + Some(Completion { + label, + kind: CompletionKind::Flag, + detail: first_doc_line(&arg.doc_comments), + documentation: Some(arg_documentation(arg)), + replace: line.partial_span, + }) + }) + .collect() +} + +fn collect_procs(document: &Document) -> Vec> { + let mut out = Vec::new(); + collect_procs_in(&document.stmts, "", &mut out); + out +} + +fn collect_procs_in<'a>( + stmts: &'a [Stmt], + prefix: &str, + out: &mut Vec>, +) { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + match &cmd.kind { + CommandKind::Proc(proc) => { + let Some(name) = proc.name.as_deref() else { + continue; + }; + let qualified = if prefix.is_empty() { + name.to_string() + } else { + format!("{prefix}::{name}") + }; + out.push(ProcInfo { + name: qualified, + doc_comments: &cmd.doc_comments, + signature: proc.signature.as_ref(), + }); + } + CommandKind::NamespaceEval(ns) => { + let Some(name) = ns.name.as_deref() else { + continue; + }; + let nested = if prefix.is_empty() { + name.to_string() + } else { + format!("{prefix}::{name}") + }; + collect_procs_in(&ns.body, &nested, out); + } + _ => {} + } + } +} + +/// True if `offset` is inside any proc's argument-declaration braces, +/// at any nesting depth. +fn in_proc_args(stmts: &[Stmt], offset: u32) -> bool { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + let CommandKind::Proc(proc) = &cmd.kind else { + continue; + }; + if proc.args_span.contains(offset) { + return true; + } + if in_proc_args(&proc.body, offset) { + return true; + } + } + false +} + +fn first_doc_line(docs: &[String]) -> Option { + crate::doc::brief(docs) +} + +fn proc_documentation(p: &ProcInfo<'_>) -> Option { + // Use `extended` (body only) here because the call site populates + // `CompletionItem::detail` with the brief sentence separately — + // shipping the full reflowed text would duplicate that sentence at + // the top of every popup. + let mut out = String::new(); + if let Some(ext) = crate::doc::extended(p.doc_comments) { + out.push_str(&ext); + } + if let Some(sig) = p.signature { + if !sig.args.is_empty() { + if !out.is_empty() { + out.push_str("\n\n"); + } + for arg in &sig.args { + write!(out, "- `-{}`", arg.name).unwrap(); + if let Some(d) = crate::doc::brief(&arg.doc_comments) { + write!(out, " — {d}").unwrap(); + } + out.push('\n'); + } + } + } + (!out.is_empty()).then_some(out) +} + +fn arg_documentation(arg: &ProcArg) -> String { + // `extended` only — the brief sentence is handled by the caller's + // `detail` field; see `proc_documentation` for the rationale. + let mut out = String::new(); + if let Some(ext) = crate::doc::extended(&arg.doc_comments) { + out.push_str(&ext); + } + for attr in &arg.attributes { + if !out.is_empty() { + out.push('\n'); + } + write!(out, "- `@{}`", attr.name).unwrap(); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::parse; + + /// Build `src` plus a cursor at the `|` marker, returning the + /// marker-free source and the byte offset of the cursor. + fn cursor(src_with_marker: &str) -> (String, u32) { + let offset = src_with_marker.find('|').expect("no cursor marker"); + let src = src_with_marker.replacen('|', "", 1); + (src, offset as u32) + } + + fn labels(src_with_marker: &str) -> Vec { + let (src, off) = cursor(src_with_marker); + let parsed = parse(&src); + complete_at(&parsed.document, &src, off) + .into_iter() + .map(|c| c.label) + .collect() + } + + #[test] + fn proc_names_in_command_position() { + let src = "\ +proc greet {} { }\n\ +proc grumble {} { }\n\ +gr|\n"; + let mut got = labels(src); + got.sort(); + assert_eq!(got, vec!["greet", "grumble"]); + } + + #[test] + fn proc_names_filtered_by_prefix() { + let src = "\ +proc greet {} { }\n\ +proc grumble {} { }\n\ +gree|\n"; + assert_eq!(labels(src), vec!["greet"]); + } + + #[test] + fn flags_in_argument_position() { + let src = "\ +proc cfg {\n width\n depth\n} { }\n\ +cfg |\n"; + let mut got = labels(src); + got.sort(); + assert_eq!(got, vec!["-depth", "-width"]); + } + + #[test] + fn flags_filtered_by_partial() { + let src = "\ +proc cfg {\n width\n depth\n} { }\n\ +cfg -w|\n"; + assert_eq!(labels(src), vec!["-width"]); + } + + #[test] + fn already_used_flag_is_not_reoffered() { + let src = "\ +proc cfg {\n width\n depth\n} { }\n\ +cfg -width 8 |\n"; + assert_eq!(labels(src), vec!["-depth"]); + } + + #[test] + fn completes_call_inside_proc_body() { + let src = "\ +proc helper {} { }\n\ +proc outer {} {\n hel|\n}\n"; + assert_eq!(labels(src), vec!["helper"]); + } + + #[test] + fn no_completion_inside_arg_decls() { + let src = "\ +proc greet {} { }\n\ +proc cfg {\n wi|\n} { }\n"; + assert!(labels(src).is_empty()); + } + + #[test] + fn enum_value_position_offers_choices() { + // `2.5_GT/s` etc. aren't valid `attribute_value_ident`s, so + // the IP generator quotes them in `@enum(…)` — the proc-args + // grammar parses them as strings. The completion labels come + // back bare here because no whitespace requires re-quoting. + let src = "\ +proc cfg {\n @enum(\"2.5_GT/s\", \"5.0_GT/s\", \"8.0_GT/s\") max_link_speed\n} { }\n\ +cfg -max_link_speed |\n"; + let mut got = labels(src); + got.sort(); + assert_eq!(got, vec!["2.5_GT/s", "5.0_GT/s", "8.0_GT/s"]); + } + + #[test] + fn enum_values_filter_by_partial() { + let src = "\ +proc cfg {\n @enum(\"2.5_GT/s\", \"5.0_GT/s\", \"8.0_GT/s\") max_link_speed\n} { }\n\ +cfg -max_link_speed 5|\n"; + assert_eq!(labels(src), vec!["5.0_GT/s"]); + } + + #[test] + fn enum_completion_kind_marks_items() { + let src = "\ +proc cfg {\n @enum(target, controller) kind\n} { }\n\ +cfg -kind |\n"; + let (s, off) = cursor(src); + let parsed = parse(&s); + let items = complete_at(&parsed.document, &s, off); + assert!(items.iter().all(|c| c.kind == CompletionKind::EnumValue)); + } + + #[test] + fn enum_value_with_spaces_gets_quoted() { + let src = "\ +proc cfg {\n @enum(\"Master Mode\", \"Slave Mode\") role\n} { }\n\ +cfg -role |\n"; + let mut got = labels(src); + got.sort(); + assert_eq!(got, vec!["\"Master Mode\"", "\"Slave Mode\""]); + } + + #[test] + fn flag_without_enum_offers_no_completions_at_value_position() { + // For a flag with no `@enum` the user is expected to type a + // free-form value. Popping a flag list there is wrong; it + // gets in the way of the actual value the user is typing. + let src = "\ +proc cfg {\n @default(0) width\n @default(0) depth\n} { }\n\ +cfg -width |\n"; + assert!(labels(src).is_empty(), "{:?}", labels(src)); + } + + #[test] + fn flag_completion_returns_after_value_is_typed() { + // After the value is typed, the cursor is between args again + // — show the next flags. + let src = "\ +proc cfg {\n @default(0) width\n @default(0) depth\n} { }\n\ +cfg -width 8 |\n"; + let mut got = labels(src); + got.sort(); + assert_eq!(got, vec!["-depth"]); + } + + #[test] + fn dash_partial_keeps_flag_completion() { + // Typing `-` after a complete flag should still mean "new + // flag," not "enum value." + let src = "\ +proc cfg {\n @enum(a, b) mode\n @default(0) width\n} { }\n\ +cfg -mode -|\n"; + let got = labels(src); + assert!(got.contains(&"-width".to_string()), "{got:?}"); + assert!(!got.contains(&"a".to_string()), "{got:?}"); + } + + #[test] + fn unknown_command_offers_no_flags() { + let src = "puts |\n"; + assert!(labels(src).is_empty()); + } + + #[test] + fn flag_completion_carries_doc_and_detail() { + // Multi-sentence doc: the brief sentence goes in `detail`, + // the rest goes in `documentation`. They must NOT overlap — + // an LSP client renders both, and a repeated leading + // sentence reads as a duplicate to the user. + let src = "\ +proc cfg { + ## Bus width in bits. Must be a power of two. + @default(8) width +} { } +cfg | +"; + let (s, off) = cursor(src); + let parsed = parse(&s); + let items = complete_at(&parsed.document, &s, off); + let item = items.iter().find(|c| c.label == "-width").unwrap(); + assert_eq!(item.kind, CompletionKind::Flag); + assert_eq!(item.detail.as_deref(), Some("Bus width in bits.")); + let doc = item.documentation.as_deref().unwrap(); + assert!(doc.contains("Must be a power of two."), "{doc}"); + assert!( + !doc.contains("Bus width in bits."), + "documentation should not repeat the brief: {doc}" + ); + assert!(doc.contains("@default"), "{doc}"); + } +} diff --git a/vw-htcl/src/doc.rs b/vw-htcl/src/doc.rs new file mode 100644 index 0000000..f17b817 --- /dev/null +++ b/vw-htcl/src/doc.rs @@ -0,0 +1,285 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Render helpers for doc-comment blocks. +//! +//! `##` doc comments in source are typically wrapped at a comfortable +//! editing column (~80 chars). When a display surface — an LSP hover, +//! signature help, completion documentation — joins those lines +//! verbatim, the source wrap survives into the rendered markdown. +//! Most LSP clients then treat the first wrapped fragment as a "brief +//! summary," which is almost always a mid-sentence truncation. +//! +//! [`reflow_doc_comments`] converts a slice of doc-comment lines into +//! markdown-clean text: consecutive non-empty lines collapse into one +//! paragraph (joined with a single space), and a blank line becomes a +//! paragraph break (`\n\n`). The first paragraph then reads as a +//! complete unit — usually one or more whole sentences — instead of +//! the editor-wrap fragment that surfaces today. + +/// One-line summary suitable for an inline annotation (LSP +/// `CompletionItem::detail`, a parameter list's `— brief` suffix, +/// etc.). Takes the first reflowed paragraph and trims to its first +/// sentence — the convention rustdoc, godoc, and most doc generators +/// follow for "short description vs full body." +/// +/// Returns `None` when `lines` has no non-blank content. Falls back +/// to the whole first paragraph when no sentence terminator (`.`, +/// `!`, `?` followed by whitespace or end-of-string) is found. +pub fn brief(lines: &[String]) -> Option { + let reflowed = reflow_doc_comments(lines); + if reflowed.is_empty() { + return None; + } + let first_paragraph = reflowed.split("\n\n").next().unwrap(); + let bytes = first_paragraph.as_bytes(); + for (i, &b) in bytes.iter().enumerate() { + if !matches!(b, b'.' | b'!' | b'?') { + continue; + } + let next = bytes.get(i + 1).copied(); + if next.is_none() || matches!(next, Some(b' ' | b'\t' | b'\n')) { + return Some(first_paragraph[..=i].to_string()); + } + } + Some(first_paragraph.to_string()) +} + +/// Extended description — everything **after** the first sentence, +/// reflowed into markdown. +/// +/// Pairs with [`brief`]: an LSP-facing renderer puts `brief` in +/// `CompletionItem::detail` (the inline summary next to the label) +/// and `extended` in `documentation` (the body popup). Splitting +/// this way avoids the duplication that occurs when both fields +/// start with the same sentence. +/// +/// Returns `None` when there is no content after the first sentence +/// — e.g. when the doc is a single-sentence summary with no body. +pub fn extended(lines: &[String]) -> Option { + let reflowed = reflow_doc_comments(lines); + if reflowed.is_empty() { + return None; + } + let bytes = reflowed.as_bytes(); + let mut split_at = None; + for (i, &b) in bytes.iter().enumerate() { + if !matches!(b, b'.' | b'!' | b'?') { + continue; + } + let next = bytes.get(i + 1).copied(); + if next.is_none() || matches!(next, Some(b' ' | b'\t' | b'\n')) { + split_at = Some(i + 1); + break; + } + } + // No sentence terminator means the whole reflow IS the brief — + // nothing to put in the body. + let after = reflowed[split_at?..].trim_start(); + (!after.is_empty()).then(|| after.to_string()) +} + +/// Word-wrap `text` into lines no wider than `width` chars. Used by +/// doc-comment generators that want source files with paragraphs +/// re-flowed to a comfortable editing width (the LSP reflows again +/// for display, but a wrapped source is easier for humans to read +/// and diff). +/// +/// A single word longer than `width` is left on a line by itself +/// rather than truncated. +pub fn wrap_paragraph(text: &str, width: usize) -> Vec { + let mut out: Vec = Vec::new(); + let mut current = String::new(); + for word in text.split_whitespace() { + if current.is_empty() { + current.push_str(word); + } else if current.len() + 1 + word.len() <= width { + current.push(' '); + current.push_str(word); + } else { + out.push(std::mem::take(&mut current)); + current.push_str(word); + } + } + if !current.is_empty() { + out.push(current); + } + out +} + +/// Reflow doc-comment lines into a markdown string. See module docs. +pub fn reflow_doc_comments(lines: &[String]) -> String { + let mut out = String::new(); + let mut paragraph = String::new(); + let flush = |paragraph: &mut String, out: &mut String| { + if paragraph.is_empty() { + return; + } + if !out.is_empty() { + out.push_str("\n\n"); + } + out.push_str(paragraph); + paragraph.clear(); + }; + for line in lines { + let trimmed = line.trim(); + if trimmed.is_empty() { + flush(&mut paragraph, &mut out); + } else { + if !paragraph.is_empty() { + paragraph.push(' '); + } + paragraph.push_str(trimmed); + } + } + flush(&mut paragraph, &mut out); + out +} + +#[cfg(test)] +mod tests { + use super::*; + + fn lines(arr: [&str; N]) -> Vec { + arr.iter().map(|s| s.to_string()).collect() + } + + #[test] + fn single_wrapped_paragraph_becomes_one_line() { + let out = reflow_doc_comments(&lines([ + "Create an external port in the current block design and connect that to the", + "selected block pin.", + ])); + assert_eq!( + out, + "Create an external port in the current block design and connect that to the selected block pin." + ); + } + + #[test] + fn blank_line_becomes_paragraph_break() { + let out = reflow_doc_comments(&lines([ + "Summary line one.", + "", + "Body line two,", + "wrapped.", + ])); + assert_eq!(out, "Summary line one.\n\nBody line two, wrapped."); + } + + #[test] + fn leading_and_trailing_blanks_are_dropped() { + let out = reflow_doc_comments(&lines(["", "Hello.", "", ""])); + assert_eq!(out, "Hello."); + } + + #[test] + fn empty_input_returns_empty_string() { + assert_eq!(reflow_doc_comments(&[]), ""); + } + + #[test] + fn brief_extracts_first_sentence_from_wrapped_lines() { + let out = brief(&lines([ + "Create an external port in the current block design and connect that to the", + "selected block pin. If a bd_cell is specified, all pins are made external.", + ])); + assert_eq!( + out.as_deref(), + Some( + "Create an external port in the current block design and connect that to the selected block pin." + ) + ); + } + + #[test] + fn brief_handles_single_sentence_proc() { + let out = brief(&lines(["Width of the data bus in bits."])); + assert_eq!(out.as_deref(), Some("Width of the data bus in bits.")); + } + + #[test] + fn brief_falls_back_to_paragraph_when_no_terminator() { + let out = brief(&lines(["just a phrase", "with no period"])); + assert_eq!(out.as_deref(), Some("just a phrase with no period")); + } + + #[test] + fn brief_returns_none_for_empty_input() { + assert!(brief(&[]).is_none()); + assert!(brief(&lines([""])).is_none()); + } + + #[test] + fn extended_skips_the_summary_sentence() { + let out = extended(&lines([ + "Summary. Body sentence in same paragraph.", + "", + "Second paragraph here.", + ])); + assert_eq!( + out.as_deref(), + Some("Body sentence in same paragraph.\n\nSecond paragraph here.") + ); + } + + #[test] + fn extended_returns_none_for_single_sentence_doc() { + assert!(extended(&lines(["Width of the data bus in bits."])).is_none()); + } + + #[test] + fn extended_brief_round_trip_covers_full_text() { + // Together, `brief` and `extended` should reproduce every + // visible character of the reflowed input (modulo a single + // separator between them). + let input = lines([ + "First sentence.", + "Continued first paragraph.", + "", + "Second paragraph.", + ]); + let b = brief(&input).unwrap(); + let e = extended(&input).unwrap(); + let full = reflow_doc_comments(&input); + // The recombined text should equal the reflow (with a space + // between b and e since the brief is part of paragraph 1). + assert!(full.starts_with(&b)); + assert!(full.ends_with(&e)); + } + + #[test] + fn brief_does_not_trip_on_decimal_or_versal_dots() { + // `3.4` shouldn't end the sentence — terminator must be + // followed by whitespace or end-of-string. + let out = + brief(&lines(["Source IP-XACT: xilinx.com:ip:versal_cips:3.4"])); + assert_eq!( + out.as_deref(), + Some("Source IP-XACT: xilinx.com:ip:versal_cips:3.4") + ); + } + + #[test] + fn wrap_paragraph_breaks_at_word_boundaries() { + let out = wrap_paragraph("one two three four five six", 12); + assert_eq!(out, vec!["one two", "three four", "five six"]); + } + + #[test] + fn wrap_paragraph_keeps_oversize_words_on_their_own_line() { + let out = wrap_paragraph("short superlongword end", 8); + assert_eq!(out, vec!["short", "superlongword", "end"]); + } + + #[test] + fn single_leading_space_is_trimmed_per_line() { + // `##` doc comments may include a leading space after the + // `##` marker that gets preserved in the parsed string; we + // trim each line so the leading space doesn't become a + // double-space inside the joined paragraph. + let out = reflow_doc_comments(&lines([" word one", " word two"])); + assert_eq!(out, "word one word two"); + } +} diff --git a/vw-htcl/src/emit.rs b/vw-htcl/src/emit.rs new file mode 100644 index 0000000..c21e264 --- /dev/null +++ b/vw-htcl/src/emit.rs @@ -0,0 +1,553 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Build and emit htcl source code. +//! +//! Distinct from [`crate::ast`], which is the parser's CST and carries +//! spans, doc comments as raw text, and a structure optimized for +//! analysis. `emit` is the dual: for code generation. No spans, +//! ergonomic constructors, and a [`Display`](std::fmt::Display) impl +//! that produces well-formed, indented htcl text. +//! +//! The model is small on purpose. The [`Word`] variants line up with +//! the parser's [`crate::ast::WordForm`] / [`crate::ast::WordPart`] +//! distinctions (bare / quoted / braced / `$var` / `[cmd]`), and +//! [`Word::lit`] picks the safest word form for a runtime string. The +//! [`ToHtcl`] trait is the interpolation interface used by `vw-quote`'s +//! `quote_htcl!` macro and by hand-written generators. + +use std::fmt; + +/// A complete htcl document being built. +#[derive(Clone, Debug, Default)] +pub struct Doc { + pub items: Vec, +} + +impl Doc { + pub fn new() -> Self { + Self::default() + } + + pub fn push(&mut self, item: impl Into) -> &mut Self { + self.items.push(item.into()); + self + } + + pub fn cmd(&mut self, cmd: Command) -> &mut Self { + self.items.push(Item::Command(cmd)); + self + } + + pub fn comment(&mut self, text: impl Into) -> &mut Self { + self.items.push(Item::Comment(text.into())); + self + } + + pub fn doc(&mut self, text: impl Into) -> &mut Self { + self.items.push(Item::DocComment(text.into())); + self + } + + pub fn blank(&mut self) -> &mut Self { + self.items.push(Item::Blank); + self + } +} + +#[derive(Clone, Debug)] +pub enum Item { + Command(Command), + /// Regular `# ...` comment (one line, no leading `#`). + Comment(String), + /// Doc `## ...` comment (one line, no leading `##`). Doc comments + /// attached to a specific command live on [`Command::doc_comments`]. + DocComment(String), + /// Emit a blank line. + Blank, +} + +impl From for Item { + fn from(c: Command) -> Self { + Item::Command(c) + } +} + +/// A single htcl command (one logical line, possibly with a body +/// block). +#[derive(Clone, Debug, Default)] +pub struct Command { + /// `##` doc comments emitted immediately above the command. + pub doc_comments: Vec, + /// The command name and its arguments, in order. + pub words: Vec, + /// Optional braced body emitted as `{ … }` after the words, with + /// its contents indented. Used by `proc`, `if`, `while`, etc. + pub body: Option, +} + +impl Command { + /// `name arg1 arg2 …` with no body. Most generic command shape. + pub fn call(name: impl Into, args: I) -> Self + where + I: IntoIterator, + W: Into, + { + let mut words = vec![name.into()]; + words.extend(args.into_iter().map(Into::into)); + Self { + words, + ..Self::default() + } + } + + pub fn with_doc(mut self, doc: impl Into) -> Self { + self.doc_comments.push(doc.into()); + self + } + + pub fn with_body(mut self, body: Doc) -> Self { + self.body = Some(body); + self + } +} + +/// One word of an htcl command. +/// +/// The variants correspond to the parser's word forms. Prefer +/// [`Word::lit`] when you have a runtime string and want the safest +/// form chosen for you; the named constructors are for when you know +/// the form (e.g. you're producing a `$var` reference deliberately). +#[derive(Clone, Debug)] +pub enum Word { + /// A bare unquoted word. Caller is responsible for ensuring `s` + /// contains no whitespace or shell-special characters; prefer + /// [`Word::lit`] when in doubt. + Bare(String), + /// A double-quoted word (`"…"`). Tcl substitution applies inside; + /// the content is escaped during emit so embedded `"` and `\` + /// are safe. + Quoted(String), + /// A braced word (`{…}`). No substitution; embedded `{`/`}` are + /// the caller's responsibility (typically rare). + Braced(String), + /// A `$name` variable reference. + Var(String), + /// A `[ cmd ]` command substitution; `s` is the interior text, + /// emitted verbatim. + CmdSubst(String), + /// Pre-formatted text inserted as-is. Caller is responsible for + /// it being a valid single word. Useful when composing fragments + /// produced elsewhere. + Raw(String), +} + +impl Word { + /// Choose the smallest safe word form for `s`: bare when it + /// contains only word-safe ASCII characters, double-quoted with + /// escapes otherwise. Empty strings become `""`. + pub fn lit(s: impl Into) -> Word { + let s = s.into(); + if needs_quoting(&s) { + Word::Quoted(s) + } else { + Word::Bare(s) + } + } + + /// `$name` reference. The name is not validated. + pub fn var(name: impl Into) -> Word { + Word::Var(name.into()) + } +} + +fn needs_quoting(s: &str) -> bool { + if s.is_empty() { + return true; + } + s.chars().any(|c| { + c.is_whitespace() + || matches!(c, ';' | '"' | '\\' | '[' | ']' | '{' | '}' | '$' | '#') + }) +} + +impl From<&str> for Word { + fn from(s: &str) -> Self { + Word::lit(s) + } +} + +impl From for Word { + fn from(s: String) -> Self { + Word::lit(s) + } +} + +// --------------------------------------------------------------------------- +// ToHtcl — the interpolation interface for `quote_htcl!`. +// --------------------------------------------------------------------------- + +/// Produce a [`Word`] for interpolation into emitted htcl. +/// +/// Implemented for the common Rust value types. Pass any `T: ToHtcl` +/// to `#expr` slots in `quote_htcl!`; the macro calls +/// `(&expr).to_htcl()` to get the inserted word. +pub trait ToHtcl { + fn to_htcl(&self) -> Word; +} + +impl ToHtcl for Word { + fn to_htcl(&self) -> Word { + self.clone() + } +} +impl ToHtcl for str { + fn to_htcl(&self) -> Word { + Word::lit(self) + } +} +impl ToHtcl for String { + fn to_htcl(&self) -> Word { + Word::lit(self.clone()) + } +} +impl ToHtcl for &T { + fn to_htcl(&self) -> Word { + (*self).to_htcl() + } +} +impl ToHtcl for bool { + fn to_htcl(&self) -> Word { + Word::Bare(if *self { "1".into() } else { "0".into() }) + } +} + +macro_rules! impl_to_htcl_display { + ($($t:ty),* $(,)?) => { + $( + impl ToHtcl for $t { + fn to_htcl(&self) -> Word { + Word::Bare(self.to_string()) + } + } + )* + }; +} +impl_to_htcl_display!(i8, i16, i32, i64, i128, isize); +impl_to_htcl_display!(u8, u16, u32, u64, u128, usize); +impl_to_htcl_display!(f32, f64); + +// --------------------------------------------------------------------------- +// ToTcl — interpolation interface for `quote_tcl!`. +// --------------------------------------------------------------------------- + +/// Produce a [`Word`] for interpolation into emitted *pure Tcl*. +/// +/// Distinct from [`ToHtcl`] so that compiler intrinsics (the `repr` +/// codegen module, `kwargs` shim helpers, future ones) which emit +/// Tcl bodies — not htcl — can carry an independent vocabulary if +/// they grow it. For now the surface is intentionally identical: +/// the same Rust value types yield the same [`Word`] under both +/// traits. The split exists so future Tcl-only forms (typed +/// `Tcl_Obj` handle quoting, namespaced-proc-name formatting, +/// etc.) can land on `ToTcl` without changing `ToHtcl`'s contract. +pub trait ToTcl { + fn to_tcl(&self) -> Word; +} + +impl ToTcl for Word { + fn to_tcl(&self) -> Word { + self.clone() + } +} +impl ToTcl for str { + fn to_tcl(&self) -> Word { + Word::lit(self) + } +} +impl ToTcl for String { + fn to_tcl(&self) -> Word { + Word::lit(self.clone()) + } +} +impl ToTcl for &T { + fn to_tcl(&self) -> Word { + (*self).to_tcl() + } +} +impl ToTcl for bool { + fn to_tcl(&self) -> Word { + Word::Bare(if *self { "1".into() } else { "0".into() }) + } +} + +macro_rules! impl_to_tcl_display { + ($($t:ty),* $(,)?) => { + $( + impl ToTcl for $t { + fn to_tcl(&self) -> Word { + Word::Bare(self.to_string()) + } + } + )* + }; +} +impl_to_tcl_display!(i8, i16, i32, i64, i128, isize); +impl_to_tcl_display!(u8, u16, u32, u64, u128, usize); +impl_to_tcl_display!(f32, f64); + +// --------------------------------------------------------------------------- +// Emit — Display impls produce well-formed htcl text. +// --------------------------------------------------------------------------- + +impl fmt::Display for Doc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + emit_doc(f, self, 0) + } +} + +impl fmt::Display for Item { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + emit_item(f, self, 0) + } +} + +impl fmt::Display for Command { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + emit_command(f, self, 0) + } +} + +impl fmt::Display for Word { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + emit_word(f, self) + } +} + +const INDENT: &str = " "; + +fn emit_indent(f: &mut fmt::Formatter<'_>, level: usize) -> fmt::Result { + for _ in 0..level { + f.write_str(INDENT)?; + } + Ok(()) +} + +fn emit_doc( + f: &mut fmt::Formatter<'_>, + doc: &Doc, + level: usize, +) -> fmt::Result { + for item in &doc.items { + emit_item(f, item, level)?; + } + Ok(()) +} + +fn emit_item( + f: &mut fmt::Formatter<'_>, + item: &Item, + level: usize, +) -> fmt::Result { + match item { + Item::Command(c) => emit_command(f, c, level), + Item::Comment(text) => { + emit_indent(f, level)?; + writeln!(f, "# {text}") + } + Item::DocComment(text) => { + emit_indent(f, level)?; + writeln!(f, "## {text}") + } + Item::Blank => writeln!(f), + } +} + +fn emit_command( + f: &mut fmt::Formatter<'_>, + cmd: &Command, + level: usize, +) -> fmt::Result { + for doc in &cmd.doc_comments { + emit_indent(f, level)?; + writeln!(f, "## {doc}")?; + } + emit_indent(f, level)?; + let mut first = true; + for w in &cmd.words { + if !first { + f.write_str(" ")?; + } + emit_word(f, w)?; + first = false; + } + if let Some(body) = &cmd.body { + if body.items.is_empty() { + f.write_str(" {}\n")?; + } else { + f.write_str(" {\n")?; + emit_doc(f, body, level + 1)?; + emit_indent(f, level)?; + f.write_str("}\n")?; + } + } else { + f.write_str("\n")?; + } + Ok(()) +} + +fn emit_word(f: &mut fmt::Formatter<'_>, w: &Word) -> fmt::Result { + match w { + Word::Bare(s) => f.write_str(s), + Word::Quoted(s) => { + f.write_str("\"")?; + for c in s.chars() { + match c { + '\\' => f.write_str("\\\\")?, + '"' => f.write_str("\\\"")?, + '$' => f.write_str("\\$")?, + '[' => f.write_str("\\[")?, + ']' => f.write_str("\\]")?, + other => f.write_fmt(format_args!("{other}"))?, + } + } + f.write_str("\"") + } + Word::Braced(s) => { + f.write_str("{")?; + f.write_str(s)?; + f.write_str("}") + } + Word::Var(name) => { + f.write_str("$")?; + f.write_str(name) + } + Word::CmdSubst(s) => { + f.write_str("[")?; + f.write_str(s)?; + f.write_str("]") + } + Word::Raw(s) => f.write_str(s), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn word_lit_picks_bare_when_safe() { + assert!( + matches!(Word::lit("hello"), Word::Bare(ref s) if s == "hello") + ); + assert!(matches!(Word::lit("32.0_GT/s"), Word::Bare(_))); + } + + #[test] + fn word_lit_quotes_when_special() { + let cases = ["with space", "has\"quote", "has$dollar", "has;semi", ""]; + for c in cases { + assert!( + matches!(Word::lit(c), Word::Quoted(_)), + "expected quoted for {c:?}" + ); + } + } + + #[test] + fn emit_command_simple() { + let cmd = Command::call("puts", ["hi"]); + assert_eq!(format!("{cmd}"), "puts hi\n"); + } + + #[test] + fn emit_command_quotes_when_needed() { + let cmd = Command::call("puts", ["hello world"]); + assert_eq!(format!("{cmd}"), "puts \"hello world\"\n"); + } + + #[test] + fn emit_doc_full_proc() { + // proc greet {name} { puts "hi $name" } + let inner = Command::call("puts", [Word::Quoted("hi $name".into())]); + let body = { + let mut d = Doc::new(); + d.cmd(inner); + d + }; + let proc = Command { + doc_comments: vec!["Say hi.".into()], + words: vec![ + Word::Bare("proc".into()), + Word::Bare("greet".into()), + Word::Braced("name".into()), + ], + body: Some(body), + }; + let mut doc = Doc::new(); + doc.cmd(proc); + let out = format!("{doc}"); + let expected = "\ +## Say hi. +proc greet {name} { + puts \"hi \\$name\" +} +"; + assert_eq!(out, expected); + } + + #[test] + fn empty_body_emits_braces() { + let cmd = Command { + words: vec![ + Word::Bare("proc".into()), + Word::Bare("f".into()), + Word::Braced("".into()), + ], + body: Some(Doc::new()), + ..Default::default() + }; + assert_eq!(format!("{cmd}"), "proc f {} {}\n"); + } + + #[test] + fn to_htcl_basic_types() { + assert!(matches!("hi".to_htcl(), Word::Bare(ref s) if s == "hi")); + assert!(matches!(42i64.to_htcl(), Word::Bare(ref s) if s == "42")); + assert!(matches!(true.to_htcl(), Word::Bare(ref s) if s == "1")); + } + + #[test] + fn emitted_output_round_trips_through_parser() { + // Build a doc, emit it, re-parse, and check we get a structurally + // similar document — proves the emitter is producing well-formed + // htcl that the parser accepts. + use crate::parser::parse; + let mut body = Doc::new(); + body.cmd(Command::call("puts", [Word::Quoted("hi $name".into())])); + let proc = Command { + words: vec![ + Word::Bare("proc".into()), + Word::Bare("greet".into()), + Word::Braced("name".into()), + ], + body: Some(body), + ..Default::default() + }; + let mut doc = Doc::new(); + doc.cmd(proc); + let text = doc.to_string(); + let parsed = parse(&text); + assert!(parsed.errors.is_empty(), "{:?}", parsed.errors); + // First (and only) statement should be the proc. + let stmt = &parsed.document.stmts[0]; + let crate::ast::Stmt::Command(cmd) = stmt else { + panic!("expected command, got {stmt:?}"); + }; + let crate::ast::CommandKind::Proc(p) = &cmd.kind else { + panic!("expected proc"); + }; + assert_eq!(p.name.as_deref(), Some("greet")); + } +} diff --git a/vw-htcl/src/enum_parse.rs b/vw-htcl/src/enum_parse.rs new file mode 100644 index 0000000..5938c04 --- /dev/null +++ b/vw-htcl/src/enum_parse.rs @@ -0,0 +1,318 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Mini-parser for the variants block of an `enum NAME = { ... }` +//! declaration. +//! +//! Grammar (operates on the text INSIDE the body braces, not +//! including the braces themselves): +//! +//! ```text +//! Variants ::= Sep* (Variant Sep+ Variant)* Sep* +//! Variant ::= Ident (':' Type)? +//! Sep ::= '\n' | comment | doc_comment | whitespace +//! ``` +//! +//! Variants are newline-separated (mirroring `proc {a; b}` arg-list +//! style); blank lines and `##` doc comments are ignored. The payload +//! type, when present, is parsed via [`crate::type_parse`] verbatim +//! — so anything that grammar accepts (primitives, newtypes, +//! generics, qualified) is legal here too. A future tightening could +//! reject `Qualified` payloads as nonsensical; v1 keeps it permissive +//! and lets the validator decide. + +use crate::ast::EnumVariant; +use crate::span::Span; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct EnumParseError { + pub message: String, + pub span: Span, +} + +/// Parse the body of an enum declaration. `text` is the contents +/// INSIDE the body braces (not including the braces). `base_offset` +/// is the absolute byte position of `text[0]` in the original +/// source so returned spans land in the right place. +pub fn parse( + text: &str, + base_offset: u32, +) -> Result, EnumParseError> { + let mut p = Parser::new(text, base_offset); + let mut variants = Vec::new(); + loop { + p.skip_separators(); + if p.eof() { + break; + } + variants.push(p.parse_variant()?); + } + Ok(variants) +} + +struct Parser<'a> { + text: &'a str, + bytes: &'a [u8], + pos: usize, + base: u32, +} + +impl<'a> Parser<'a> { + fn new(text: &'a str, base: u32) -> Self { + Self { + text, + bytes: text.as_bytes(), + pos: 0, + base, + } + } + + fn eof(&self) -> bool { + self.pos >= self.bytes.len() + } + + fn here(&self) -> u32 { + self.base + self.pos as u32 + } + + fn here_span(&self) -> Span { + let h = self.here(); + Span::new(h, h) + } + + fn span_from(&self, start: usize) -> Span { + Span::new(self.base + start as u32, self.base + self.pos as u32) + } + + fn skip_horizontal_ws(&mut self) { + while self.pos < self.bytes.len() { + let c = self.bytes[self.pos]; + if c == b' ' || c == b'\t' || c == b'\r' { + self.pos += 1; + } else { + break; + } + } + } + + /// Skip newlines, whitespace, regular `#` comments, and `##` + /// doc comments. Variants are separated by at least one newline + /// (or are at the start of the body). + fn skip_separators(&mut self) { + loop { + // Any whitespace, including newlines. + while self.pos < self.bytes.len() + && self.bytes[self.pos].is_ascii_whitespace() + { + self.pos += 1; + } + if self.eof() { + break; + } + // Comment line — consume to next newline. `##` doc + // comments are dropped here; if a future revision needs + // to attach docs to variants, this is the spot. + if self.bytes[self.pos] == b'#' { + while self.pos < self.bytes.len() + && self.bytes[self.pos] != b'\n' + { + self.pos += 1; + } + continue; + } + break; + } + } + + /// Variant := IDENT (':' TYPE)? + fn parse_variant(&mut self) -> Result { + let start = self.pos; + let (name, name_span) = self.parse_ident()?; + self.skip_horizontal_ws(); + let payload_pos = self.pos; + let (payload, payload_span) = if self.pos < self.bytes.len() + && self.bytes[self.pos] == b':' + { + self.pos += 1; // ':' + self.skip_horizontal_ws(); + let type_start = self.pos; + // Consume up to end-of-line or end-of-input. The + // type-text-extraction window stops at newline so a + // bad payload doesn't bleed into the next variant. + while self.pos < self.bytes.len() && self.bytes[self.pos] != b'\n' { + self.pos += 1; + } + // Trim trailing horizontal whitespace from the + // payload text so spans are tight. + let mut end = self.pos; + while end > type_start + && matches!(self.bytes[end - 1], b' ' | b'\t' | b'\r') + { + end -= 1; + } + let payload_text = &self.text[type_start..end]; + let span = Span::new( + self.base + type_start as u32, + self.base + end as u32, + ); + let ty = crate::type_parse::parse( + payload_text, + self.base + type_start as u32, + ) + .map_err(|e| EnumParseError { + message: e.message, + span: e.span, + })?; + (Some(ty), span) + } else { + // Empty-payload variant. Span is a zero-width point + // right after the name. + let here = Span::new( + self.base + payload_pos as u32, + self.base + payload_pos as u32, + ); + (None, here) + }; + Ok(EnumVariant { + name, + name_span, + payload, + payload_span, + span: self.span_from(start), + }) + } + + fn parse_ident(&mut self) -> Result<(String, Span), EnumParseError> { + self.skip_horizontal_ws(); + let start = self.pos; + if self.eof() { + return Err(EnumParseError { + message: "expected variant name, found end of body".into(), + span: self.here_span(), + }); + } + let first = self.bytes[self.pos]; + if !(first.is_ascii_alphabetic() || first == b'_') { + return Err(EnumParseError { + message: format!( + "expected variant name, found `{}`", + first as char + ), + span: self.here_span(), + }); + } + self.pos += 1; + while self.pos < self.bytes.len() { + let c = self.bytes[self.pos]; + if c.is_ascii_alphanumeric() || c == b'_' { + self.pos += 1; + } else { + break; + } + } + let name = self.text[start..self.pos].to_string(); + Ok((name, self.span_from(start))) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::TypeExpr; + + fn p(s: &str) -> Vec { + parse(s, 0).unwrap_or_else(|e| panic!("parse failed: {e:?}")) + } + + #[test] + fn empty_body() { + let v = p(""); + assert!(v.is_empty()); + let v = p(" \n\n "); + assert!(v.is_empty()); + } + + #[test] + fn single_variant_with_payload() { + let v = p("Scalar: string"); + assert_eq!(v.len(), 1); + assert_eq!(v[0].name, "Scalar"); + let ty = v[0].payload.as_ref().unwrap(); + assert_eq!(ty.name(), "string"); + } + + #[test] + fn single_empty_payload_variant() { + let v = p("North"); + assert_eq!(v.len(), 1); + assert_eq!(v[0].name, "North"); + assert!(v[0].payload.is_none()); + } + + #[test] + fn mixed_payload_and_empty() { + let v = p("\n North\n South: int\n East\n West\n"); + assert_eq!(v.len(), 4); + assert_eq!(v[0].name, "North"); + assert!(v[0].payload.is_none()); + assert_eq!(v[1].name, "South"); + assert_eq!(v[1].payload.as_ref().unwrap().name(), "int"); + assert_eq!(v[2].name, "East"); + assert!(v[2].payload.is_none()); + assert_eq!(v[3].name, "West"); + assert!(v[3].payload.is_none()); + } + + #[test] + fn generic_payload() { + let v = p("\n Scalar: string\n Nested: dict\n"); + assert_eq!(v.len(), 2); + let TypeExpr::Generic { name, args, .. } = + v[1].payload.as_ref().unwrap() + else { + panic!() + }; + assert_eq!(name, "dict"); + assert_eq!(args.len(), 2); + assert_eq!(args[0].name(), "string"); + assert_eq!(args[1].name(), "Property"); + } + + #[test] + fn comments_skipped_between_variants() { + let v = p( + "\n# leading comment\n## doc comment\nScalar: string\n# trailing\nNested: int\n", + ); + assert_eq!(v.len(), 2); + assert_eq!(v[0].name, "Scalar"); + assert_eq!(v[1].name, "Nested"); + } + + #[test] + fn err_invalid_first_char() { + let e = parse("123Foo: int", 0).unwrap_err(); + assert!(e.message.contains("variant name")); + } + + #[test] + fn err_bad_payload_type() { + let e = parse("Scalar: Option { + let table = signature_table(document); + definition_in_stmts(&document.stmts, None, document, &table, offset) + // Fallback: a `$var` the structured tree keeps opaque — inside + // a command substitution or an `if`/`while` condition. Found by + // scanning the source and resolving against the enclosing + // proc's scope. + .or_else(|| definition_of_scanned_var(document, source, offset)) +} + +fn definition_of_scanned_var( + document: &Document, + source: &str, + offset: u32, +) -> Option { + let (name, _) = scan_var_ref(source, offset)?; + let (stmts, enclosing) = innermost_scope(document, offset); + resolve_var_def(&name, stmts, enclosing, offset).map(|d| d.def_span()) +} + +/// Resolve the definition at `offset` within `stmts`, descending into +/// proc bodies. `enclosing` is the proc whose body `stmts` belongs to +/// (`None` at the top level), used to resolve variables to parameters. +/// `document` is the whole document so call sites — at any nesting +/// depth — can find their declaring proc, which always lives at the +/// top level. +fn definition_in_stmts<'a>( + stmts: &'a [Stmt], + enclosing: Option<&'a Proc>, + document: &'a Document, + table: &SignatureTable<'a>, + offset: u32, +) -> Option { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + if !cmd.span.contains(offset) { + continue; + } + + // Inside a proc declaration, attribute ident values can + // reference sibling args by name. Resolve those to the arg's + // declaration site. + if let CommandKind::Proc(proc) = &cmd.kind { + if let Some(span) = definition_in_proc_decl(proc, offset) { + return Some(span); + } + // Cursor on the proc's own name — "goto def" of the def + // itself is the same span. Not super useful but + // consistent. + if proc.name_span.contains(offset) { + return Some(proc.name_span); + } + // Otherwise the cursor is somewhere in the body: recurse, + // making this proc the enclosing scope. + return definition_in_stmts( + &proc.body, + Some(proc), + document, + table, + offset, + ); + } + + // Cursor on a `$var` reference → its definition in scope. + if let Some(span) = definition_of_var(cmd, stmts, enclosing, offset) { + return Some(span); + } + + // Generic call site. Two flavors: + // 1. Cursor on the call name → proc declaration. + // 2. Cursor on a `-flag` arg → that arg's decl in the proc. + if let Some(span) = definition_in_call(cmd, document, table, offset) { + return Some(span); + } + + // Cursor inside a `[ … ]` command substitution → recurse into + // its parsed body so goto works on calls written inline. + if let Some(span) = + definition_in_cmd_substs(cmd, document, table, offset) + { + return Some(span); + } + } + None +} + +fn definition_in_cmd_substs<'a>( + cmd: &'a Command, + document: &'a Document, + table: &SignatureTable<'a>, + offset: u32, +) -> Option { + for word in &cmd.words { + if !word.span.contains(offset) { + continue; + } + for part in &word.parts { + if let crate::ast::WordPart::CmdSubst { span, body, .. } = part { + if span.contains(offset) { + return definition_in_stmts( + body, None, document, table, offset, + ); + } + } + } + } + None +} + +/// If the cursor is on a `$var` reference (a real [`WordPart::VarRef`]) +/// in `cmd`, resolve it to its definition within `scope_stmts` or a +/// parameter of `enclosing`. +fn definition_of_var<'a>( + cmd: &'a Command, + scope_stmts: &'a [Stmt], + enclosing: Option<&'a Proc>, + offset: u32, +) -> Option { + let name = var_ref_at(cmd, offset)?; + resolve_var_def(name, scope_stmts, enclosing, offset).map(|d| d.def_span()) +} + +/// The name of the `$var` reference under the cursor, if any. Walks +/// word parts so it also fires inside quoted words (`"hi $name"`) and +/// array syntax (`$arr($idx)`). +fn var_ref_at(cmd: &Command, offset: u32) -> Option<&str> { + for word in &cmd.words { + if !word.span.contains(offset) { + continue; + } + for part in &word.parts { + if let WordPart::VarRef { name, span } = part { + if span.contains(offset) { + return Some(name.as_str()); + } + } + } + } + None +} + +fn definition_in_call<'a>( + cmd: &'a Command, + document: &'a Document, + table: &SignatureTable<'a>, + offset: u32, +) -> Option { + let first = cmd.words.first()?; + let name = first.as_text()?; + + // Cursor on the call name. + if first.span.contains(offset) { + let proc = find_proc_decl(document, name)?; + return Some(proc.name_span); + } + + // Cursor on one of the `-flag` words. Look the flag up in the + // called proc's signature and return that arg's name_span. + let sig = *table.get(name)?; + for word in cmd.words.iter().skip(1) { + if !word.span.contains(offset) { + continue; + } + let text = word.as_text()?; + let flag = text.strip_prefix('-')?; + let arg = sig.find(flag)?; + return Some(arg.name_span); + } + + None +} + +/// Find the `proc` declaration that registers under `name` in the +/// document's signature table. Walks `namespace eval` bodies +/// recursively so a call to `project::set_target_language` resolves +/// to the inner `proc set_target_language` inside +/// `namespace eval project { … }`. +fn find_proc_decl<'a>(document: &'a Document, name: &str) -> Option<&'a Proc> { + find_proc_decl_in(&document.stmts, "", name) +} + +fn find_proc_decl_in<'a>( + stmts: &'a [Stmt], + prefix: &str, + name: &str, +) -> Option<&'a Proc> { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + match &cmd.kind { + CommandKind::Proc(proc) => { + let Some(decl_name) = proc.name.as_deref() else { + continue; + }; + let qualified = if prefix.is_empty() { + decl_name.to_string() + } else { + format!("{prefix}::{decl_name}") + }; + if qualified == name { + return Some(proc); + } + } + CommandKind::NamespaceEval(ns) => { + let Some(ns_name) = ns.name.as_deref() else { + continue; + }; + let nested = if prefix.is_empty() { + ns_name.to_string() + } else { + format!("{prefix}::{ns_name}") + }; + if let Some(found) = find_proc_decl_in(&ns.body, &nested, name) + { + return Some(found); + } + } + _ => {} + } + } + None +} + +fn definition_in_proc_decl(proc: &Proc, offset: u32) -> Option { + let sig = proc.signature.as_ref()?; + for arg in &sig.args { + for attr in &arg.attributes { + for value in &attr.values { + let AttributeValue::Ident { value: name, span } = value else { + continue; + }; + if !span.contains(offset) { + continue; + } + if let Some(target) = find_sibling_arg(sig, name) { + return Some(target.name_span); + } + // Ident value naming an unknown arg — no definition. + return None; + } + } + } + None +} + +fn find_sibling_arg<'a>( + sig: &'a ProcSignature, + name: &str, +) -> Option<&'a ProcArg> { + sig.args.iter().find(|a| a.name == name) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::parse; + + fn first(src: &str, needle: &str) -> u32 { + src.find(needle).expect("needle not found") as u32 + } + + fn nth(src: &str, needle: &str, n: usize) -> u32 { + let mut start = 0; + for i in 0..=n { + let pos = src[start..] + .find(needle) + .map(|p| start + p) + .expect("needle not found enough times"); + if i == n { + return pos as u32; + } + start = pos + needle.len(); + } + unreachable!() + } + + #[test] + fn call_to_proc_decl() { + let src = "\ +proc greet {\n name\n} { puts hi }\n\ +greet -name there\n"; + let parsed = parse(src); + // Cursor on the `g` of the call-site `greet`. + let pos = first(src, "greet -"); + let target = definition_at(&parsed.document, src, pos).unwrap(); + // Should point at the `greet` in the proc declaration (first + // occurrence after `proc `). + let decl_span = parsed + .document + .stmts + .iter() + .find_map(|s| match s { + Stmt::Command(c) => match &c.kind { + CommandKind::Proc(p) + if p.name.as_deref() == Some("greet") => + { + Some(p.name_span) + } + _ => None, + }, + _ => None, + }) + .unwrap(); + assert_eq!(target, decl_span); + } + + #[test] + fn attribute_ident_to_sibling_arg() { + let src = "\ +proc f {\n has_a\n @requires(has_a) has_b\n} { }\n"; + let parsed = parse(src); + // Cursor on `has_a` inside `@requires(has_a)`. + // First occurrence is the declaration; second is the + // attribute argument. + let pos = nth(src, "has_a", 1); + let target = definition_at(&parsed.document, src, pos).unwrap(); + let decl_pos = first(src, "has_a"); + assert_eq!(target.start, decl_pos); + } + + #[test] + fn call_to_unknown_proc_returns_none() { + let src = "puts hello\n"; + let parsed = parse(src); + assert!( + definition_at(&parsed.document, src, first(src, "puts")).is_none() + ); + } + + #[test] + fn attribute_ident_to_unknown_arg_returns_none() { + let src = "proc f {\n @requires(typo) only\n} { }\n"; + let parsed = parse(src); + let pos = first(src, "typo"); + assert!(definition_at(&parsed.document, src, pos).is_none()); + } + + #[test] + fn call_flag_to_arg_decl() { + let src = "\ +proc show {\n flag_a\n width\n} { }\n\ +show -width 16\n"; + let parsed = parse(src); + // Cursor on `-width` at the call site. + let pos = first(src, "-width"); + let target = definition_at(&parsed.document, src, pos).unwrap(); + // Decl `width` arg name is the second `width` in the source. + let decl_pos = nth(src, "width", 0); + assert_eq!(target.start, decl_pos); + } + + #[test] + fn call_to_namespaced_proc_resolves_to_inner_decl() { + // `project::set_target_language` at the call site should + // resolve to `proc set_target_language` declared inside the + // matching `namespace eval project { ... }` block. + let src = "\ +namespace eval project { + proc set_target_language { + proj + language + } { } +} +project::set_target_language -proj p -language VHDL +"; + let parsed = parse(src); + let pos = first(src, "project::set_target_language"); + let target = definition_at(&parsed.document, src, pos).unwrap(); + // The decl's name span covers just `set_target_language` + // (without the namespace prefix), which appears as the + // first occurrence of that bare token in the source. + let decl_pos = first(src, "set_target_language"); + assert_eq!(target.start, decl_pos); + } + + #[test] + fn call_inside_proc_body_to_proc_decl() { + // Mirrors interface.htcl: a call to a top-level proc from + // inside another proc's body. + let src = "\ +proc if_tport {\n type\n name\n} { }\n\ +proc axis {\n width\n} {\n if_tport\n}\n"; + let parsed = parse(src); + // Cursor on the `if_tport` call inside `axis`'s body — the + // second occurrence of `if_tport`. + let pos = nth(src, "if_tport", 1); + let target = definition_at(&parsed.document, src, pos).unwrap(); + // Resolves to the `if_tport` name in the declaration (first + // occurrence). + assert_eq!(target.start, first(src, "if_tport")); + } + + #[test] + fn var_ref_to_set_in_same_body() { + // Mirrors interface.htcl: `$mode` resolves to `set mode ...`. + let src = "\ +proc axis_if {\n kind\n} {\n\ + set mode hello\n\ + use_it $mode\n}\n"; + let parsed = parse(src); + let pos = first(src, "$mode") + 1; // on the `m` of `$mode` + let target = definition_at(&parsed.document, src, pos).unwrap(); + // Should point at the `mode` in `set mode`. + assert_eq!(target.start, first(src, "mode")); + } + + #[test] + fn var_ref_to_proc_parameter() { + // `$name` has no `set`, so it resolves to the proc parameter. + let src = "\ +proc axis_if {\n kind\n name\n} {\n\ + use_it $name\n}\n"; + let parsed = parse(src); + let pos = first(src, "$name") + 1; + let target = definition_at(&parsed.document, src, pos).unwrap(); + // Parameter `name` decl is the first occurrence of `name`. + assert_eq!(target.start, first(src, "name")); + } + + #[test] + fn var_ref_to_variable_declaration() { + let src = "\ +proc p {} {\n\ + variable vlnv\n\ + use_it $vlnv\n}\n"; + let parsed = parse(src); + let pos = first(src, "$vlnv") + 1; + let target = definition_at(&parsed.document, src, pos).unwrap(); + assert_eq!(target.start, first(src, "vlnv")); + } + + #[test] + fn unknown_var_ref_returns_none() { + let src = "proc p {} {\n use_it $nope\n}\n"; + let parsed = parse(src); + let pos = first(src, "$nope") + 1; + assert!(definition_at(&parsed.document, src, pos).is_none()); + } + + #[test] + fn var_ref_inside_opaque_condition_resolves_to_param() { + // `$kind` lives inside an `if` condition that sits inside a + // command substitution — both opaque to the structured tree. + // The source-scan fallback still resolves it to the parameter. + let src = "\ +proc axis_if {\n kind\n} {\n\ + set mode [\n\ + if {$kind == controller} { Master }\n\ + ]\n}\n"; + let parsed = parse(src); + let pos = nth(src, "$kind", 0) + 1; + let target = definition_at(&parsed.document, src, pos).unwrap(); + assert_eq!(target.start, first(src, "kind")); + } + + #[test] + fn call_inside_command_substitution_resolves_to_decl() { + let src = "\ +proc create_cpm5 {\n name\n} { puts hi }\n\ +set cell [create_cpm5 -name x]\n"; + let parsed = parse(src); + // The second occurrence of `create_cpm5` (the call inside `[…]`). + let pos = nth(src, "create_cpm5", 1); + let target = definition_at(&parsed.document, src, pos).unwrap(); + assert_eq!(target.start, first(src, "create_cpm5")); + } + + #[test] + fn call_flag_to_unknown_arg_returns_none() { + let src = "\ +proc show {\n width\n} { }\n\ +show -widthz 16\n"; + let parsed = parse(src); + let pos = first(src, "-widthz"); + assert!(definition_at(&parsed.document, src, pos).is_none()); + } +} diff --git a/vw-htcl/src/hover.rs b/vw-htcl/src/hover.rs new file mode 100644 index 0000000..e251647 --- /dev/null +++ b/vw-htcl/src/hover.rs @@ -0,0 +1,412 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Find the htcl construct at a given byte offset. +//! +//! Used by `vw analyzer` for `textDocument/hover` and (later) by the +//! REPL for inline hover-style popups. Pure analysis — returns a +//! structured [`HoverTarget`] referencing into the document; the +//! caller formats it (markdown for LSP, a ratatui widget for the +//! REPL, etc). + +use crate::ast::{ + Command, CommandKind, Document, Proc, ProcArg, ProcSignature, Stmt, Word, +}; +use crate::lower::{signature_table, SignatureTable}; +use crate::scope::{innermost_scope, resolve_var_def, scan_var_ref, VarDef}; +use crate::span::Span; + +/// A construct the cursor is on, plus the data needed to render +/// hover content. Lifetime-tied to the [`Document`] passed into +/// [`hover_at`]. +#[derive(Clone, Debug)] +pub enum HoverTarget<'a> { + /// Cursor is on the name of a `proc` declaration. The proc's own + /// signature contains the docs. + ProcDef { proc: &'a Proc, span: Span }, + /// Cursor is on the name of an argument inside a `proc` + /// declaration's args braces. + ProcArgDef { + proc_name: String, + arg: &'a ProcArg, + span: Span, + }, + /// Cursor is on the first word of a command that resolves to a + /// known structured proc — i.e. a call to a documented proc. + CallSite { + proc_name: String, + signature: &'a ProcSignature, + span: Span, + }, + /// Cursor is on a `-flag` word in a call to a known proc. + CallArg { + proc_name: String, + arg: &'a ProcArg, + span: Span, + }, + /// Cursor is on a `$var` reference that resolves to a local + /// (`set`/`variable`) rather than a parameter. The span is the + /// reference itself. + LocalVar { name: String, span: Span }, + /// Cursor is on the name of an `enum` declaration. Shows the + /// variants block as a hover popup. + EnumDef { + decl: &'a crate::ast::EnumDecl, + span: Span, + }, +} + +impl HoverTarget<'_> { + pub fn span(&self) -> Span { + match self { + HoverTarget::ProcDef { span, .. } + | HoverTarget::ProcArgDef { span, .. } + | HoverTarget::CallSite { span, .. } + | HoverTarget::CallArg { span, .. } + | HoverTarget::LocalVar { span, .. } + | HoverTarget::EnumDef { span, .. } => *span, + } + } +} + +pub fn hover_at<'a>( + document: &'a Document, + source: &str, + offset: u32, +) -> Option> { + let table = signature_table(document); + hover_in_stmts(&document.stmts, &table, offset) + // Fallback: a `$var` reference — including one buried in opaque + // text (a command substitution or `if`/`while` condition). + .or_else(|| hover_scanned_var(document, source, offset)) +} + +/// Hover for a `$var` reference found by scanning the source. Resolves +/// to a parameter (rendered like an arg) or a local (`set`/`variable`). +fn hover_scanned_var<'a>( + document: &'a Document, + source: &str, + offset: u32, +) -> Option> { + let (name, span) = scan_var_ref(source, offset)?; + let (stmts, enclosing) = innermost_scope(document, offset); + match resolve_var_def(&name, stmts, enclosing, offset)? { + VarDef::Param(arg) => Some(HoverTarget::ProcArgDef { + proc_name: enclosing + .and_then(|p| p.name.clone()) + .unwrap_or_default(), + arg, + // Anchor the hover on the reference, not the declaration. + span, + }), + VarDef::Local(_) => Some(HoverTarget::LocalVar { name, span }), + } +} + +/// Find the hover target at `offset` within `stmts`, descending into +/// proc bodies. The signature table is the document-wide (top-level) +/// one, so a call inside a body still resolves to the proc it names. +fn hover_in_stmts<'a>( + stmts: &'a [Stmt], + table: &SignatureTable<'a>, + offset: u32, +) -> Option> { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + if !cmd.span.contains(offset) { + continue; + } + if let Some(target) = hover_in_command(cmd, table, offset) { + return Some(target); + } + } + None +} + +fn hover_in_command<'a>( + cmd: &'a Command, + table: &SignatureTable<'a>, + offset: u32, +) -> Option> { + let primary = match &cmd.kind { + CommandKind::Proc(proc) => hover_in_proc_decl(proc, offset) + // Cursor isn't on the proc's name or an arg — look inside + // the body. + .or_else(|| hover_in_stmts(&proc.body, table, offset)), + CommandKind::EnumDecl(decl) => { + // Cursor on the enum's name → show the variants. + if decl.name_span.contains(offset) { + Some(HoverTarget::EnumDef { + decl, + span: decl.name_span, + }) + } else { + None + } + } + _ => hover_in_call(cmd, table, offset), + }; + primary.or_else(|| hover_in_cmd_substs(&cmd.words, table, offset)) +} + +/// Descend into any `[ … ]` command substitutions on this command's +/// words so hover works on calls written inline, e.g. +/// `set cell [create_cpm5 -name x]`. +fn hover_in_cmd_substs<'a>( + words: &'a [Word], + table: &SignatureTable<'a>, + offset: u32, +) -> Option> { + for word in words { + if !word.span.contains(offset) { + continue; + } + for part in &word.parts { + if let crate::ast::WordPart::CmdSubst { span, body, .. } = part { + if span.contains(offset) { + return hover_in_stmts(body, table, offset); + } + } + } + } + None +} + +fn hover_in_proc_decl<'a>( + proc: &'a Proc, + offset: u32, +) -> Option> { + if proc.name_span.contains(offset) { + return Some(HoverTarget::ProcDef { + proc, + span: proc.name_span, + }); + } + if let Some(sig) = proc.signature.as_ref() { + for arg in &sig.args { + if arg.name_span.contains(offset) { + let proc_name = proc.name.clone().unwrap_or_default(); + return Some(HoverTarget::ProcArgDef { + proc_name, + arg, + span: arg.name_span, + }); + } + } + } + None +} + +fn hover_in_call<'a>( + cmd: &'a Command, + table: &SignatureTable<'a>, + offset: u32, +) -> Option> { + let first = cmd.words.first()?; + let name = first.as_text()?; + let sig = *table.get(name)?; + + if first.span.contains(offset) { + return Some(HoverTarget::CallSite { + proc_name: name.to_string(), + signature: sig, + span: first.span, + }); + } + + // Walk remaining words looking for the `-flag` under the cursor. + // Value words (the token after a flag) don't trigger hover — + // they could be anything from a literal to a [cmd subst], and + // there's no general definition to point at. + for word in cmd.words.iter().skip(1) { + if !word.span.contains(offset) { + continue; + } + let text = word.as_text()?; + let flag = text.strip_prefix('-')?; + let arg = sig.find(flag)?; + return Some(HoverTarget::CallArg { + proc_name: name.to_string(), + arg, + span: word.span, + }); + } + None +} + +// Helpers retained for symmetric use from formatters that want to +// pretty-print attributes etc. without re-walking from raw AST. +#[allow(dead_code)] +fn _word_text(word: &Word) -> Option<&str> { + word.as_text() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::parse; + + fn at(src: &str, needle: &str, occurrence: usize) -> u32 { + let mut start = 0; + for i in 0..=occurrence { + let pos = src[start..] + .find(needle) + .map(|p| start + p) + .expect("needle not found"); + if i == occurrence { + return pos as u32; + } + start = pos + needle.len(); + } + unreachable!() + } + + fn first(src: &str, needle: &str) -> u32 { + at(src, needle, 0) + } + + #[test] + fn hover_on_call_name() { + let src = "\ +proc greet {\n @default(\"world\") name\n} { puts $name }\n\ +greet -name there\n"; + let parsed = parse(src); + let target = + hover_at(&parsed.document, src, first(src, "greet -")).unwrap(); + match target { + HoverTarget::CallSite { proc_name, .. } => { + assert_eq!(proc_name, "greet"); + } + other => panic!("expected CallSite, got {other:?}"), + } + } + + #[test] + fn hover_on_call_arg_flag() { + let src = "\ +proc greet {\n @default(\"world\") name\n} { puts $name }\n\ +greet -name there\n"; + let parsed = parse(src); + let pos = first(src, "-name there"); + let target = hover_at(&parsed.document, src, pos).unwrap(); + match target { + HoverTarget::CallArg { arg, proc_name, .. } => { + assert_eq!(proc_name, "greet"); + assert_eq!(arg.name, "name"); + } + other => panic!("expected CallArg, got {other:?}"), + } + } + + #[test] + fn hover_on_value_word_returns_none() { + let src = "\ +proc greet {\n @default(\"world\") name\n} { puts $name }\n\ +greet -name there\n"; + let parsed = parse(src); + let pos = first(src, "there"); + assert!(hover_at(&parsed.document, src, pos).is_none()); + } + + #[test] + fn hover_on_proc_decl_name() { + let src = "proc greet {\n name\n} { puts $name }\n"; + let parsed = parse(src); + let pos = first(src, "greet"); + let target = hover_at(&parsed.document, src, pos).unwrap(); + assert!(matches!(target, HoverTarget::ProcDef { .. })); + } + + #[test] + fn hover_on_proc_arg_decl() { + let src = "proc greet {\n @default(\"x\") name\n} { puts hi }\n"; + let parsed = parse(src); + let pos = first(src, "name"); // first "name" is in args + let target = hover_at(&parsed.document, src, pos).unwrap(); + match target { + HoverTarget::ProcArgDef { arg, .. } => { + assert_eq!(arg.name, "name"); + assert_eq!(arg.attributes[0].name, "default"); + } + other => panic!("expected ProcArgDef, got {other:?}"), + } + } + + #[test] + fn hover_on_call_inside_proc_body() { + // A call to a documented proc from within another proc's body + // should hover, just like a top-level call. + let src = "\ +proc if_tport {\n type\n name\n} { }\n\ +proc axis {\n width\n} {\n if_tport\n}\n"; + let parsed = parse(src); + let pos = at(src, "if_tport", 1); + let target = hover_at(&parsed.document, src, pos).unwrap(); + match target { + HoverTarget::CallSite { proc_name, .. } => { + assert_eq!(proc_name, "if_tport"); + } + other => panic!("expected CallSite, got {other:?}"), + } + } + + #[test] + fn hover_on_call_inside_command_substitution() { + // The interior of `[ … ]` is now parsed; hover on the inner + // call's name should report the proc the same way it does at + // the top level. + let src = "\ +proc create_cpm5 {\n @default(0) name\n} { puts hi }\n\ +set cell [create_cpm5 -name x]\n"; + let parsed = parse(src); + let pos = at(src, "create_cpm5", 1); // the call inside [ ] + let target = hover_at(&parsed.document, src, pos).unwrap(); + match target { + HoverTarget::CallSite { proc_name, .. } => { + assert_eq!(proc_name, "create_cpm5"); + } + other => panic!("expected CallSite, got {other:?}"), + } + } + + #[test] + fn hover_on_unknown_call_returns_none() { + let src = "puts hello\n"; + let parsed = parse(src); + let pos = first(src, "puts"); + assert!(hover_at(&parsed.document, src, pos).is_none()); + } + + #[test] + fn hover_on_var_in_condition_shows_param() { + // `$kind` inside an opaque `if` condition resolves, via the + // source scan, to the proc parameter — rendered like an arg. + let src = "\ +proc axis_if {\n @enum(target, controller) kind\n} {\n\ + set m [ if {$kind == controller} { a } ]\n}\n"; + let parsed = parse(src); + let pos = first(src, "$kind") + 1; + let target = hover_at(&parsed.document, src, pos).unwrap(); + match target { + HoverTarget::ProcArgDef { arg, .. } => { + assert_eq!(arg.name, "kind"); + assert_eq!(arg.attributes[0].name, "enum"); + } + other => panic!("expected ProcArgDef, got {other:?}"), + } + } + + #[test] + fn hover_on_local_var_reports_local() { + let src = "\ +proc p {} {\n set count 0\n use $count\n}\n"; + let parsed = parse(src); + let pos = first(src, "$count") + 1; + let target = hover_at(&parsed.document, src, pos).unwrap(); + match target { + HoverTarget::LocalVar { name, .. } => assert_eq!(name, "count"), + other => panic!("expected LocalVar, got {other:?}"), + } + } +} diff --git a/vw-htcl/src/lib.rs b/vw-htcl/src/lib.rs new file mode 100644 index 0000000..f1b9d4f --- /dev/null +++ b/vw-htcl/src/lib.rs @@ -0,0 +1,85 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! htcl language layer. +//! +//! Provides the parser, concrete syntax tree, and analysis passes that +//! every htcl-consuming subcommand of `vw` shares. The same code drives +//! `vw run`, `vw check`, the LSP (`vw analyzer`), and (eventually) the +//! REPL (`vw repl`). Keeping a single source of truth for parsing and +//! analysis is the durable fix for the "compiler vs. IDE drift" failure +//! mode of language tooling. +//! +//! This v0 covers the Phase 0 subset from the project plan: literals, +//! variables, command substitution, `set`, `proc` (vanilla form), +//! generic command invocations, and comments. Control flow, structured +//! proc grammar, modules, and dependency-aware imports come in later +//! phases. + +// `quote_tcl!` (and `quote_htcl!`) generate code that names this +// crate as `::vw_htcl::…`. Within the crate itself that path +// doesn't resolve by default; this directive aliases the current +// crate as `vw_htcl` so the macros work uniformly inside and +// outside of vw-htcl. Standard Rust idiom for self-targeting +// proc-macros. +extern crate self as vw_htcl; + +pub mod ast; +pub mod cmdline; +pub mod complete; +pub mod doc; +pub mod emit; +pub mod enum_parse; +pub mod goto; +pub mod hover; +pub mod line_index; +pub mod loader; +pub mod lower; +pub mod overload; +pub mod parser; +pub mod proc_args; +pub mod repr; +pub mod scope; +pub mod signature_help; +pub mod span; +pub mod src_path; +pub mod type_parse; +pub mod validate; + +pub use complete::{complete_at, Completion, CompletionKind}; +pub use goto::definition_at; +pub use hover::{hover_at, HoverTarget}; +pub use loader::{ + load as load_program, load_with_observer as load_program_with_observer, + ImportEdge, LoadError, LoadObserver, LoadedFile, LoadedProgram, + SourceRegion, +}; +pub use lower::{ + extern_rename_prelude, is_extern_call, lower_command, + lower_proc_decl_with_name, rewrite_externs, signature_table, ExternRewrite, + SignatureTable, EXTERN_PREFIX, +}; +pub use overload::emit_dispatcher; +pub use repr::{ + emit_enum_prelude, emit_primitive_prelude, emit_repr, emit_repr_with_types, +}; +pub use signature_help::{signature_help_at, SignatureHelp}; +pub use src_path::{ + classify as classify_src_path, PathKind, ResolveError, Resolver, +}; +pub use validate::{ + build_enum_decl_table, build_signature_table_with_overloads, + build_type_decl_table, mangle_specialization, validate, + validate_with_all_extras, validate_with_extras, validate_with_signatures, + Diagnostic as ValidatorDiagnostic, OverloadTable, Severity, +}; + +pub use ast::{ + Attribute, AttributeValue, Command, CommandKind, Document, EnumDecl, + EnumVariant, OverloadInfo, OverloadVariant, Proc, ProcArg, ProcSignature, + SrcImport, Stmt, TypeDecl, TypeExpr, Word, WordPart, +}; +pub use line_index::{LineCol, LineIndex}; +pub use parser::{parse, ParseError, ParseOutput}; +pub use span::Span; diff --git a/vw-htcl/src/line_index.rs b/vw-htcl/src/line_index.rs new file mode 100644 index 0000000..4375eca --- /dev/null +++ b/vw-htcl/src/line_index.rs @@ -0,0 +1,163 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Byte-offset ↔ line/column conversion. +//! +//! LSP positions are 0-indexed `(line, character)` where `character` +//! counts UTF-16 code units, not bytes. We honor that here so the +//! editor's cursor lands where the user expects on non-ASCII source. + +use crate::span::Span; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct LineCol { + pub line: u32, + pub character: u32, +} + +#[derive(Clone, Debug)] +pub struct LineIndex { + /// Byte offset of the start of each line. `line_starts[0] == 0`. + line_starts: Vec, + /// Full source text; needed for the UTF-8 → UTF-16 column + /// conversion. + text: String, +} + +impl LineIndex { + pub fn new(text: &str) -> Self { + let mut line_starts = vec![0u32]; + for (i, b) in text.bytes().enumerate() { + if b == b'\n' { + line_starts.push((i + 1) as u32); + } + } + Self { + line_starts, + text: text.to_string(), + } + } + + pub fn position(&self, byte_offset: u32) -> LineCol { + let offset = byte_offset.min(self.text.len() as u32); + let line_idx = match self.line_starts.binary_search(&offset) { + Ok(i) => i, + Err(i) => i - 1, + }; + let line_start = self.line_starts[line_idx]; + let line_text = &self.text[line_start as usize..offset as usize]; + let character = line_text.encode_utf16().count() as u32; + LineCol { + line: line_idx as u32, + character, + } + } + + pub fn range(&self, span: Span) -> (LineCol, LineCol) { + (self.position(span.start), self.position(span.end)) + } + + /// Convert a UTF-16 line/character position back to a byte + /// offset. Inverse of [`position`](Self::position); used to map + /// LSP positions from clients (which speak UTF-16) into byte + /// offsets the rest of the analysis uses. + /// + /// Clamps gracefully: a line past EOF returns the source length; + /// a character past EOL returns the offset of the line ending. + pub fn offset_of(&self, lc: LineCol) -> u32 { + let Some(&line_start) = self.line_starts.get(lc.line as usize) else { + return self.text.len() as u32; + }; + let line_end = self + .line_starts + .get(lc.line as usize + 1) + .copied() + .map(|n| n.saturating_sub(1)) + .unwrap_or(self.text.len() as u32); + let line_text = &self.text[line_start as usize..line_end as usize]; + let mut byte_offset = line_start; + let mut char_count: u32 = 0; + for ch in line_text.chars() { + if char_count >= lc.character { + break; + } + char_count += ch.len_utf16() as u32; + byte_offset += ch.len_utf8() as u32; + } + byte_offset + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn basic_positions() { + let idx = LineIndex::new("abc\nde\nf"); + assert_eq!( + idx.position(0), + LineCol { + line: 0, + character: 0 + } + ); + assert_eq!( + idx.position(3), + LineCol { + line: 0, + character: 3 + } + ); + assert_eq!( + idx.position(4), + LineCol { + line: 1, + character: 0 + } + ); + assert_eq!( + idx.position(7), + LineCol { + line: 2, + character: 0 + } + ); + } + + #[test] + fn utf16_character_count() { + // `é` is one UTF-16 code unit, two UTF-8 bytes. + let idx = LineIndex::new("é\nx"); + let pos = idx.position(2); // byte after `é` + assert_eq!( + pos, + LineCol { + line: 0, + character: 1 + } + ); + } + + #[test] + fn offset_of_round_trips() { + let idx = LineIndex::new("abc\nde\nf"); + for &b in &[0u32, 1, 3, 4, 6, 7] { + assert_eq!(idx.offset_of(idx.position(b)), b); + } + } + + #[test] + fn offset_of_clamps_past_line_end() { + let idx = LineIndex::new("abc\nde"); + // line 0, character 100 → end of line 0 (byte 3) + assert_eq!( + idx.offset_of(LineCol { + line: 0, + character: 100 + }), + 3 + ); + } +} diff --git a/vw-htcl/src/loader.rs b/vw-htcl/src/loader.rs new file mode 100644 index 0000000..05ad667 --- /dev/null +++ b/vw-htcl/src/loader.rs @@ -0,0 +1,575 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Recursive `src` import resolution. +//! +//! Reads an entry-point .htcl file, parses it, resolves every `src` +//! statement via [`crate::src_path::Resolver`], and recursively pulls +//! in each imported module's contents. Idempotent on canonical +//! (realpath'd) file paths — a file imported by N callers loads +//! exactly once. +//! +//! The output is a single flat [`LoadedProgram`] carrying: +//! +//! - the concatenated source text (imports first, in topological +//! order, then the entry file's non-`src` content), which downstream +//! stages (lower, the analyzer, `vw run`) consume as if it were one +//! document; +//! - the set of canonical paths that were loaded, for cache +//! invalidation and tooling. + +use std::collections::HashSet; +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; + +use crate::ast::{CommandKind, Stmt}; +use crate::parser::parse; +use crate::src_path::{ResolveError, Resolver}; + +#[derive(Debug, thiserror::Error)] +pub enum LoadError { + #[error("reading {path}: {source}")] + Io { + path: PathBuf, + #[source] + source: io::Error, + }, + #[error("resolving `src {raw}` from {importer}: {source}")] + Resolve { + importer: PathBuf, + raw: String, + #[source] + source: ResolveError, + }, + #[error( + "`src` import at {importer}:{line} has a non-literal path (it \ + contains `$var` or `[cmd]` substitution); module paths must \ + be a plain string" + )] + DynamicPath { importer: PathBuf, line: u32 }, + #[error("parse errors in {path}")] + Parse { + path: PathBuf, + errors: Vec, + }, +} + +/// Hooks called as the loader makes progress. Lets the CLI surface +/// real-time `Sourcing …` / `Checking …` lines without baking display +/// concerns into the loader. +/// +/// Events fire in dependency-first order, which matches Cargo's +/// "compile deps before the top crate" convention: for each `src` +/// import we hit, [`on_source`](Self::on_source) fires immediately, +/// the import is loaded (recursing through *its* dependencies first), +/// and only then does [`on_parsed`](Self::on_parsed) fire for that +/// file. The entry file's `on_parsed` fires last. +pub trait LoadObserver { + /// A `src ` statement is about to be resolved and loaded. + fn on_source(&mut self, _raw: &str) {} + /// `file` finished parsing. `raw` is the original `src` text when + /// this file was reached through an import (so callers can render + /// `amd-htcl/cpm5` rather than the full filesystem path); `None` + /// for the entry file. + fn on_parsed(&mut self, _file: &Path, _raw: Option<&str>) {} +} + +struct NoopObserver; +impl LoadObserver for NoopObserver {} + +#[derive(Debug, Default)] +pub struct LoadedProgram { + /// Flattened htcl source — every loaded file's non-`src` content, + /// concatenated. Downstream stages (lower, the analyzer in CLI + /// mode, `vw run`) consume this as if it were one document. + pub source: String, + /// Files seen, in the order [`load_file`] first visits them + /// (importer-first, depth-first). Each entry carries the file's + /// canonical path and its original on-disk text so callers can + /// map a span in [`source`](Self::source) back to a line/column + /// in the file it actually came from. + pub files: Vec, + /// Per-region map from byte ranges in [`source`](Self::source) + /// to `(file_index, file_offset)`. Regions are emitted in order + /// as content is concatenated, so the slice is sorted by + /// `flat_start` and non-overlapping — `locate` does a binary + /// search. + pub regions: Vec, +} + +#[derive(Debug, Clone)] +pub struct LoadedFile { + pub path: PathBuf, + pub source: String, + /// The `src` import that pulled this file in, or `None` for + /// the entry file the loader was started against. Captured at + /// load time so analyzers and the REPL can render call chains + /// like `failing.htcl:12 ← importer.htcl:4 (src @dep/foo) + /// ← entry.htcl:1 (src ip/cips)` — which is the htcl-level + /// equivalent of a stack trace. + pub imported_via: Option, +} + +/// An edge in the import graph: this file was loaded because the +/// file at index [`Self::importer_file`] executed a `src` statement +/// covering [`Self::src_span`] in the importer's source. +#[derive(Debug, Clone, Copy)] +pub struct ImportEdge { + pub importer_file: usize, + /// Span of the `src` statement in the **importer's file-local + /// source** (i.e. an offset into the importer's + /// [`LoadedFile::source`], *not* into the flattened + /// [`LoadedProgram::source`]). + pub src_span: crate::span::Span, +} + +#[derive(Debug, Clone, Copy)] +pub struct SourceRegion { + /// Inclusive byte start in the flattened source. + pub flat_start: u32, + /// Exclusive byte end in the flattened source. + pub flat_end: u32, + /// Index into [`LoadedProgram::files`]. + pub file_index: u32, + /// Byte offset of the start of this region in the originating + /// file's source. + pub file_offset: u32, +} + +impl LoadedProgram { + /// Map a byte offset in [`source`](Self::source) back to its + /// originating file's index and the byte offset within that file. + pub fn locate(&self, offset: u32) -> Option<(usize, u32)> { + // `regions` is sorted by `flat_start`; find the last region + // whose start is at or before `offset` and verify the offset + // falls inside it. + let idx = self.regions.partition_point(|r| r.flat_start <= offset); + if idx == 0 { + return None; + } + let region = &self.regions[idx - 1]; + if offset >= region.flat_end { + return None; + } + Some(( + region.file_index as usize, + region.file_offset + (offset - region.flat_start), + )) + } + + /// Map a span in the flattened source to `(file_index, + /// file_local_span)`. Assumes the span lies within a single + /// originating file's contribution — true for diagnostics emitted + /// against a single word/command, which is the use case we care + /// about. + pub fn locate_span( + &self, + span: crate::span::Span, + ) -> Option<(usize, crate::span::Span)> { + let (file_index, file_start) = self.locate(span.start)?; + let length = span.end.saturating_sub(span.start); + Some(( + file_index, + crate::span::Span::new(file_start, file_start + length), + )) + } + + /// Walk the import chain from `file_index` toward the entry, + /// yielding each [`ImportEdge`] in order (nearest first). The + /// entry file has no edge and so produces no items. + pub fn ancestry( + &self, + file_index: usize, + ) -> impl Iterator + '_ { + let mut cur = self.files.get(file_index).and_then(|f| f.imported_via); + std::iter::from_fn(move || { + let edge = cur?; + cur = self + .files + .get(edge.importer_file) + .and_then(|f| f.imported_via); + Some(edge) + }) + } +} + +/// Read `entry` and recursively resolve its imports. Each file is +/// loaded at most once; circular imports (a → b → a) short-circuit on +/// the second visit. +pub fn load( + entry: &Path, + resolver: &Resolver, +) -> Result { + let mut noop = NoopObserver; + load_with_observer(entry, resolver, &mut noop) +} + +/// Like [`load`], but reports progress through `observer` so the CLI +/// can print `Sourcing …` and `Checking …` lines. +pub fn load_with_observer( + entry: &Path, + resolver: &Resolver, + observer: &mut dyn LoadObserver, +) -> Result { + let entry = entry.canonicalize().unwrap_or_else(|_| entry.to_path_buf()); + let mut state = State { + program: LoadedProgram::default(), + loaded: HashSet::new(), + in_progress: HashSet::new(), + resolver, + observer, + }; + state.load_file(&entry, None, None)?; + Ok(state.program) +} + +struct State<'r, 'o> { + program: LoadedProgram, + loaded: HashSet, + in_progress: HashSet, + resolver: &'r Resolver, + observer: &'o mut dyn LoadObserver, +} + +impl State<'_, '_> { + fn load_file( + &mut self, + path: &Path, + reached_via: Option<&str>, + imported_via: Option, + ) -> Result<(), LoadError> { + if self.loaded.contains(path) || self.in_progress.contains(path) { + return Ok(()); + } + self.in_progress.insert(path.to_path_buf()); + + let source = fs::read_to_string(path).map_err(|e| LoadError::Io { + path: path.to_path_buf(), + source: e, + })?; + let parsed = parse(&source); + if !parsed.errors.is_empty() { + return Err(LoadError::Parse { + path: path.to_path_buf(), + errors: parsed.errors, + }); + } + + // Register the file up front so we have a stable index for + // every chunk we emit on its behalf. + let file_index = self.program.files.len() as u32; + self.program.files.push(LoadedFile { + path: path.to_path_buf(), + source: source.clone(), + imported_via, + }); + + // Walk the parsed document, copying text in span order. Any + // `src` statement triggers a recursion so the imported content + // lands in the flat source before we continue the importer's + // remaining text. Each pushed slice gets a `SourceRegion` + // entry so locations in the flat source can be mapped back. + let mut cursor = 0usize; + let parent_dir = path.parent().unwrap_or_else(|| Path::new(".")); + for stmt in &parsed.document.stmts { + let Stmt::Command(cmd) = stmt else { continue }; + let CommandKind::Src(import) = &cmd.kind else { + continue; + }; + self.emit_chunk( + &source, + cursor, + cmd.span.start as usize, + file_index, + ); + cursor = cmd.span.end as usize; + // Skip the trailing newline that terminated the `src` + // command so we don't leave a stray blank line behind. + if source.as_bytes().get(cursor) == Some(&b'\n') { + cursor += 1; + } + + let Some(raw) = import.path.as_deref() else { + let line = line_of(&source, cmd.span.start) + 1; + return Err(LoadError::DynamicPath { + importer: path.to_path_buf(), + line, + }); + }; + let resolved = + self.resolver.resolve(parent_dir, raw).map_err(|source| { + LoadError::Resolve { + importer: path.to_path_buf(), + raw: raw.to_string(), + source, + } + })?; + if !self.loaded.contains(&resolved) + && !self.in_progress.contains(&resolved) + { + self.observer.on_source(raw); + } + self.load_file( + &resolved, + Some(raw), + Some(ImportEdge { + importer_file: file_index as usize, + src_span: cmd.span, + }), + )?; + } + // Tail after the last `src`. + self.emit_chunk(&source, cursor, source.len(), file_index); + if !self.program.source.ends_with('\n') { + // Synthetic newline so subsequent files don't run on; no + // region for it — it didn't come from any input file. + self.program.source.push('\n'); + } + + self.in_progress.remove(path); + self.loaded.insert(path.to_path_buf()); + self.observer.on_parsed(path, reached_via); + Ok(()) + } + + /// Push `source[start..end]` onto the flat source and record a + /// region mapping that byte range back to the file it came from. + fn emit_chunk( + &mut self, + source: &str, + start: usize, + end: usize, + file_index: u32, + ) { + if start >= end { + return; + } + let flat_start = self.program.source.len() as u32; + self.program.source.push_str(&source[start..end]); + let flat_end = self.program.source.len() as u32; + self.program.regions.push(SourceRegion { + flat_start, + flat_end, + file_index, + file_offset: start as u32, + }); + } +} + +fn line_of(source: &str, byte: u32) -> u32 { + source[..(byte as usize).min(source.len())] + .bytes() + .filter(|b| *b == b'\n') + .count() as u32 +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn workspace() -> tempfile::TempDir { + tempfile::tempdir().unwrap() + } + + #[test] + fn loads_a_single_file_unchanged() { + let dir = workspace(); + let entry = dir.path().join("main.htcl"); + fs::write(&entry, "puts hi\n").unwrap(); + let prog = load(&entry, &Resolver::new()).unwrap(); + assert_eq!(prog.source.trim(), "puts hi"); + assert_eq!(prog.files.len(), 1); + } + + #[test] + fn imports_local_file_and_drops_src_statement() { + let dir = workspace(); + fs::write(dir.path().join("lib.htcl"), "proc f {} { puts hi }\n") + .unwrap(); + let entry = dir.path().join("main.htcl"); + fs::write(&entry, "src lib\nf\n").unwrap(); + + let prog = load(&entry, &Resolver::new()).unwrap(); + // Imported content first, no `src` statement, then the importer. + assert_eq!( + prog.source, "proc f {} { puts hi }\nf\n", + "actual: {:?}", + prog.source + ); + assert_eq!(prog.files.len(), 2); + } + + #[test] + fn idempotent_across_diamond_imports() { + // main → a, b ; a → c ; b → c — c must load exactly once. + let dir = workspace(); + fs::write(dir.path().join("c.htcl"), "proc c {} {}\n").unwrap(); + fs::write(dir.path().join("a.htcl"), "src c\nproc a {} {}\n").unwrap(); + fs::write(dir.path().join("b.htcl"), "src c\nproc b {} {}\n").unwrap(); + let entry = dir.path().join("main.htcl"); + fs::write(&entry, "src a\nsrc b\n").unwrap(); + + let prog = load(&entry, &Resolver::new()).unwrap(); + let occurrences = prog.source.matches("proc c {}").count(); + assert_eq!(occurrences, 1, "c loaded multiple times: {}", prog.source); + } + + #[test] + fn cycle_does_not_loop_forever() { + let dir = workspace(); + fs::write(dir.path().join("a.htcl"), "src b\nproc a {} {}\n").unwrap(); + fs::write(dir.path().join("b.htcl"), "src a\nproc b {} {}\n").unwrap(); + let entry = dir.path().join("main.htcl"); + fs::write(&entry, "src a\n").unwrap(); + let prog = load(&entry, &Resolver::new()).unwrap(); + assert!(prog.source.contains("proc a")); + assert!(prog.source.contains("proc b")); + } + + #[test] + fn named_dependency_resolves_through_the_cache() { + let dir = workspace(); + let dep_root = dir.path().join("cache").join("xilinx-ip-deadbeef"); + fs::create_dir_all(&dep_root).unwrap(); + fs::write(dep_root.join("cpm5.htcl"), "proc create_cpm5 {} {}\n") + .unwrap(); + let resolver = Resolver::new().with_dep("xilinx-ip", dep_root); + let entry = dir.path().join("main.htcl"); + fs::write(&entry, "src @xilinx-ip/cpm5\ncreate_cpm5\n").unwrap(); + let prog = load(&entry, &resolver).unwrap(); + assert!(prog.source.contains("proc create_cpm5")); + assert!(prog.source.contains("\ncreate_cpm5\n")); + } + + #[test] + fn observer_fires_in_dependency_order() { + // entry → a → c ; entry → b + // Expect: source a, parse a (after source c, parse c), + // source b, parse b, parse entry. + let dir = workspace(); + fs::write(dir.path().join("c.htcl"), "proc c {} {}\n").unwrap(); + fs::write(dir.path().join("a.htcl"), "src c\nproc a {} {}\n").unwrap(); + fs::write(dir.path().join("b.htcl"), "proc b {} {}\n").unwrap(); + let entry = dir.path().join("main.htcl"); + fs::write(&entry, "src a\nsrc b\n").unwrap(); + + #[derive(Default)] + struct Recorder { + events: Vec, + } + impl LoadObserver for Recorder { + fn on_source(&mut self, raw: &str) { + self.events.push(format!("source {raw}")); + } + fn on_parsed(&mut self, file: &Path, raw: Option<&str>) { + let label = match raw { + Some(r) => r.to_string(), + None => file + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("?") + .to_string(), + }; + self.events.push(format!("parse {label}")); + } + } + + let mut rec = Recorder::default(); + load_with_observer(&entry, &Resolver::new(), &mut rec).unwrap(); + assert_eq!( + rec.events, + vec![ + "source a", + "source c", + "parse c", + "parse a", + "source b", + "parse b", + "parse main", + ] + ); + } + + #[test] + fn observer_suppresses_source_for_already_loaded_imports() { + // Diamond: main → a → c ; main → b → c. `c` is encountered + // twice via `src` but only loaded once, so "Sourcing c" + // should fire exactly once. + let dir = workspace(); + fs::write(dir.path().join("c.htcl"), "proc c {} {}\n").unwrap(); + fs::write(dir.path().join("a.htcl"), "src c\nproc a {} {}\n").unwrap(); + fs::write(dir.path().join("b.htcl"), "src c\nproc b {} {}\n").unwrap(); + let entry = dir.path().join("main.htcl"); + fs::write(&entry, "src a\nsrc b\n").unwrap(); + + #[derive(Default)] + struct Counter { + source_c: usize, + parse_c: usize, + } + impl LoadObserver for Counter { + fn on_source(&mut self, raw: &str) { + if raw == "c" { + self.source_c += 1; + } + } + fn on_parsed(&mut self, file: &Path, _raw: Option<&str>) { + if file.file_stem().and_then(|s| s.to_str()) == Some("c") { + self.parse_c += 1; + } + } + } + let mut counter = Counter::default(); + load_with_observer(&entry, &Resolver::new(), &mut counter).unwrap(); + assert_eq!(counter.source_c, 1); + assert_eq!(counter.parse_c, 1); + } + + #[test] + fn regions_map_each_byte_back_to_its_originating_file() { + // entry uses `set` from one local file and `puts` from another. + let dir = workspace(); + fs::write(dir.path().join("a.htcl"), "proc a {} {}\n").unwrap(); + fs::write(dir.path().join("b.htcl"), "proc b {} {}\n").unwrap(); + let entry = dir.path().join("main.htcl"); + fs::write(&entry, "src a\nputs hello\nsrc b\nputs done\n").unwrap(); + let prog = load(&entry, &Resolver::new()).unwrap(); + + // Pick a byte in the middle of `puts hello` — should map back + // to the entry file (main.htcl). + let puts_hello_at_flat = + prog.source.find("puts hello").expect("puts hello in flat") as u32; + let (idx, file_offset) = + prog.locate(puts_hello_at_flat).expect("locate puts hello"); + assert_eq!( + prog.files[idx].path.file_name().and_then(|s| s.to_str()), + Some("main.htcl") + ); + // In main.htcl the line `puts hello` sits right after `src a\n`, + // so file_offset is at byte 6 (`s`=0,1,2,r=3,c=4,a=5,\n=6). + // Actually 'src a\n' = 6 bytes (s,r,c,space,a,\n), so puts starts at 6. + assert_eq!(file_offset, 6); + + // Pick a byte in the middle of `proc a` — should map to a.htcl. + let proc_a_at_flat = + prog.source.find("proc a").expect("proc a in flat") as u32; + let (idx_a, _) = prog.locate(proc_a_at_flat).expect("locate proc a"); + assert_eq!( + prog.files[idx_a].path.file_name().and_then(|s| s.to_str()), + Some("a.htcl") + ); + } + + #[test] + fn unknown_dep_surfaces_helpful_error() { + let dir = workspace(); + let entry = dir.path().join("main.htcl"); + fs::write(&entry, "src @nope/cpm5\n").unwrap(); + let err = load(&entry, &Resolver::new()).unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("unknown dependency"), "{msg}"); + } +} diff --git a/vw-htcl/src/lower.rs b/vw-htcl/src/lower.rs new file mode 100644 index 0000000..2afb688 --- /dev/null +++ b/vw-htcl/src/lower.rs @@ -0,0 +1,766 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Lower htcl to plain Tcl for the EDA backend. +//! +//! Phase 2 lowering: +//! +//! - Structured `proc` declarations emit `proc name {arg1 arg2 ...} +//! body`, where the arg list is the declared canonical order with +//! no attributes (Vivado's Tcl doesn't understand `@default` etc.). +//! - Call sites to a known structured proc rewrite their `-flag +//! value` form to a positional list in the canonical order, with +//! defaults filled in for omitted args. +//! - Everything else (comments, unknown commands, calls to commands +//! without a structured signature) passes through verbatim. +//! +//! Limitation: only top-level proc declarations and top-level call +//! sites are lowered. Calls *inside* a proc body are not rewritten — +//! the body text is shipped as-is. Phase 3+ will recursively lower +//! nested commands once we have static analysis of proc bodies. + +use std::collections::HashMap; + +use crate::ast::{ + Command, CommandKind, Document, NamespaceEval, Proc, ProcSignature, Stmt, + Word, WordForm, WordPart, +}; + +pub type SignatureTable<'a> = HashMap; + +/// Walk `doc` and collect every proc's signature — top-level and +/// nested inside `namespace eval` blocks. Namespaced procs register +/// under their qualified name (`::`), matching the +/// signature table the validator builds so call-site lowering works +/// uniformly for both shapes. +pub fn signature_table(doc: &Document) -> SignatureTable<'_> { + let mut table = HashMap::new(); + collect_into(&doc.stmts, "", &mut table); + table +} + +fn collect_into<'a>( + stmts: &'a [Stmt], + prefix: &str, + table: &mut SignatureTable<'a>, +) { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + match &cmd.kind { + CommandKind::Proc(proc) => { + let Some(name) = proc.name.as_deref() else { + continue; + }; + let Some(sig) = proc.signature.as_ref() else { + continue; + }; + let qualified = if prefix.is_empty() { + name.to_string() + } else { + format!("{prefix}::{name}") + }; + table.insert(qualified, sig); + } + CommandKind::NamespaceEval(ns) => { + let Some(name) = ns.name.as_deref() else { + continue; + }; + let nested = if prefix.is_empty() { + name.to_string() + } else { + format!("{prefix}::{name}") + }; + collect_into(&ns.body, &nested, table); + } + _ => {} + } + } +} + +/// Lower one top-level command into its Tcl equivalent for the EDA +/// backend. +pub fn lower_command( + cmd: &Command, + source: &str, + table: &SignatureTable<'_>, +) -> String { + match &cmd.kind { + CommandKind::Proc(proc) => lower_proc_decl(proc, source, table), + CommandKind::NamespaceEval(ns) => { + lower_namespace_eval(ns, source, table) + } + // `src` is a module import; by the time we lower we expect the + // [`crate::loader`] flatten pass to have already inlined every + // import's contents and dropped the `src` statements. Anything + // that slips through here we render as a no-op comment so the + // emitted Tcl is still well-formed. + CommandKind::Src(import) => { + let path = import.path.as_deref().unwrap_or(""); + format!("# vw: unresolved `src {path}` — loader bypass") + } + // Newtype declarations are compile-time only — they feed the + // analyzer / printer machinery but ship nothing to Vivado. + // Drop entirely (empty Tcl, no whitespace). + CommandKind::TypeDecl(_) => String::new(), + // Enum declarations are also compile-time-only at this + // layer — the codegen path (vw-htcl/src/repr.rs) emits the + // auto-generated `namespace eval ` block separately + // through the same wrap-with-repr pipeline used for the + // primitive prelude. The decl itself ships nothing. + CommandKind::EnumDecl(_) => String::new(), + _ => { + // Verbatim, but reconstructed word-by-word so that any + // `[ … ]` substitution inside the command gets its own + // commands lowered through the same pipeline (extern + // rewrites, multi-line bracket flattening). + // + // No keyword→positional rewrite here: htcl is keyword- + // only at the call site, and the rewrite from `-flag + // value` pairs to local variables happens at runtime + // in the wrapper's `::vw::kwargs $args { ... }` prelude + // (emitted by `lower_proc_decl`). That lets call sites + // anywhere — top-level, inside a proc body, inside a + // `[ ... ]`, inside an `eval` — work uniformly without + // the lowerer needing to see every call site. + lower_words(&cmd.words, source, table) + } + } +} + +/// Lower a `namespace eval` block: recurse on each inner statement +/// (so inner proc declarations get their htcl attributes stripped +/// and gain the `::vw::kwargs` runtime prelude) and wrap the +/// result in `namespace eval { ... }`. Output is a single +/// Tcl-valid string the EDA backend can `eval` directly. +fn lower_namespace_eval( + ns: &NamespaceEval, + source: &str, + table: &SignatureTable<'_>, +) -> String { + let name = ns.name.as_deref().unwrap_or(""); + let mut body = String::new(); + for stmt in &ns.body { + let Stmt::Command(cmd) = stmt else { continue }; + let line = lower_command(cmd, source, table); + if !line.is_empty() { + body.push_str(&line); + body.push('\n'); + } + } + format!("namespace eval {name} {{\n{body}}}") +} + +/// Lower a `proc` declaration into a Tcl proc whose runtime +/// signature is `args` (variadic). The first line of the body is a +/// generated `::vw::kwargs $args { name default ... }` call that +/// parses the caller's `-flag value` pairs into local variables +/// matching the declared parameter names — defaults applied where +/// the caller didn't supply a flag. After the prelude the original +/// body runs unchanged, using `$name`, `$dir`, etc. just as if +/// they were standard Tcl parameters. +/// +/// Why this shape: htcl is keyword-only at the call site. Doing +/// the parse at runtime (in the wrapper) means every call site +/// works the same — top-level, inside a proc body, inside a +/// `[ ... ]` substitution, inside an `eval`. The previous +/// architecture rewrote `-flag value` → positional at compile +/// time, but only for top-level calls the lowerer could see; calls +/// inside proc bodies stayed verbatim and broke at runtime against +/// a positional-only wrapper proc. +/// +/// Procs without a parsed signature (parser couldn't extract one +/// from the args list, e.g. mid-edit syntax error) pass through as +/// plain Tcl: `proc name { } { }`. The +/// `::vw::kwargs` prelude is only emitted when we know what +/// parameters to declare. +fn lower_proc_decl( + proc: &Proc, + source: &str, + table: &SignatureTable<'_>, +) -> String { + lower_proc_decl_with_name(proc, source, table, None) +} + +/// Like [`lower_proc_decl`] but uses `name_override` as the emitted +/// proc name instead of `proc.name`. Used by the REPL when lowering +/// an enum-overload specialization under its mangled name +/// (`____`) — the source name on the parsed proc +/// is the user-visible public name (`handle_prop`), but the +/// dispatcher needs the specialization to live under its mangled +/// alias so the runtime switch can find it. +pub fn lower_proc_decl_with_name( + proc: &Proc, + source: &str, + table: &SignatureTable<'_>, + name_override: Option<&str>, +) -> String { + let name = name_override.or(proc.name.as_deref()).unwrap_or(""); + // Re-emit the body by walking its parsed statements rather + // than slicing raw text. This is what gives htcl's "newlines + // inside `[ … ]` are whitespace" semantics inside proc bodies + // too — verbatim slicing leaves Tcl to interpret the + // newlines as command separators, which silently splits a + // multi-line `set x [ foo \n -a 1 \n -b 2 \n]` into four + // separate calls and drops every flag arg. + // + // Critically, we pad the emitted body with blank lines so + // each lowered statement lands on the SAME line it occupied + // in the source. Tcl's `info frame` reports body lines + // relative to the script text it was given — without padding, + // collapsing a 5-line `[ ... ]` to one line shifts every + // subsequent statement upward and the stack trace's + // "line N in proc X" ends up pointing at unrelated source + // lines. With padding, Tcl's body line N == source body + // line N == `body_start_file_line + N - 1`, which is what + // the REPL's `ProcLocation::resolve_body_line` already + // assumes. + let line_idx = crate::line_index::LineIndex::new(source); + let body_open_line = line_idx.position(proc.body_span.start).line; // 0-based + let body = if proc.body.is_empty() { + proc.body_span.slice(source).to_string() + } else { + let mut out = String::new(); + // First emitted body line corresponds to one line after + // the line containing `{`. We track 0-based file lines + // throughout. + let mut cur_line = body_open_line + 1; + for stmt in &proc.body { + let Stmt::Command(cmd) = stmt else { continue }; + let stmt_line = line_idx.position(cmd.span.start).line; + while cur_line < stmt_line { + out.push('\n'); + cur_line += 1; + } + let line = lower_command(cmd, source, table); + if line.is_empty() { + continue; + } + out.push_str(&line); + out.push('\n'); + cur_line += 1 + line.matches('\n').count() as u32; + } + out + }; + let Some(sig) = proc.signature.as_ref() else { + // Couldn't parse a structured signature — emit the proc + // verbatim. Tcl will accept it if the raw arg text is + // valid Tcl; otherwise the user already has a parse-error + // diagnostic from the upstream parser. + let args_list = proc.args_span.slice(source); + return format!("proc {name} {{{args_list}}} {{{body}}}"); + }; + let sig_dict = build_kwargs_sig_dict(sig); + // Put `::vw::kwargs` on the SAME line as the opening `{` so + // it doesn't eat the first source line of the body and shift + // subsequent statements. Tcl treats "the line containing `{`" + // as body line 1 — putting the kwargs preamble there means + // body line 2 onward maps 1:1 to source lines, matching + // what the padding loop above produced. + format!( + "proc {name} {{args}} {{ ::vw::kwargs $args {{{sig_dict}}}\n{body}}}" + ) +} + +/// Render the parameter list as a flat `name default name default +/// ...` Tcl dict for [`::vw::kwargs`] to consume. The default for +/// an arg without `@default` is the empty string `""` — at which +/// point the validator has already complained at compile time +/// about missing required args. Quote each default through +/// [`AttributeValue::to_tcl_literal`] so integers, idents, and +/// strings all round-trip correctly. +fn build_kwargs_sig_dict(sig: &ProcSignature) -> String { + let mut out = String::new(); + for (i, arg) in sig.args.iter().enumerate() { + if i > 0 { + out.push(' '); + } + out.push_str(&arg.name); + out.push(' '); + let default = arg + .attribute("default") + .and_then(|attr| attr.values.first()) + .map(|v| v.to_tcl_literal()) + .unwrap_or_else(|| "\"\"".to_string()); + out.push_str(&default); + } + out +} + +/// The syntactic prefix that marks a call to a runtime-Tcl proc +/// (an "extern") rather than an htcl-defined proc. Anywhere in +/// lowered text, `extern::name` rewrites to a mangled Tcl symbol +/// the lowering's prelude has aliased to the underlying proc. +pub const EXTERN_PREFIX: &str = "extern::"; + +/// Result of [`rewrite_externs`]: the lowered text with every +/// `extern::name` reference replaced by its mangled Tcl form, plus +/// the deduplicated set of external names that were referenced. +/// Callers feed `names` to [`extern_rename_prelude`] to build the +/// one-time setup that exposes each extern at its mangled name. +#[derive(Clone, Debug)] +pub struct ExternRewrite { + pub text: String, + pub names: Vec, +} + +/// Rewrite every `extern::` in `text` to `::` — the +/// Tcl-absolute form that anchors the lookup at the global +/// namespace. Returns the rewritten text plus the unique, sorted +/// set of names seen. +/// +/// Anchoring at `::` matters because htcl wrappers live inside +/// `namespace eval vivado { … }`. Inside that namespace a bare +/// `create_project` resolution searches the *current* namespace +/// first and finds `vivado::create_project` (the wrapper itself!) — +/// infinite recursion. The leading `::` skips the current- +/// namespace search and goes straight to the global, where the +/// unshadowed Vivado native lives. +/// +/// The rewrite is text-level, not AST-level. Proc bodies lower +/// as raw text, and a textual pass cleanly catches calls at any +/// nesting depth — inside `[ … ]`, inside multi-arm +/// `if {…} { … extern::foo … } else { … }`, etc. Word-boundary +/// detection on the leading side prevents `not_extern::foo` from +/// triggering; the trailing identifier is parsed greedily so +/// `extern::a::b::c` rewrites as one unit (→ `::a::b::c`). +pub fn rewrite_externs(text: &str) -> ExternRewrite { + let mut out = String::with_capacity(text.len()); + let mut names: std::collections::BTreeSet = + std::collections::BTreeSet::new(); + let bytes = text.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if i + EXTERN_PREFIX.len() <= bytes.len() + && &bytes[i..i + EXTERN_PREFIX.len()] == EXTERN_PREFIX.as_bytes() + && (i == 0 || !is_extern_ident_byte(bytes[i - 1])) + { + let name_start = i + EXTERN_PREFIX.len(); + let name_end = scan_extern_name_end(bytes, name_start); + if name_end > name_start { + let name = &text[name_start..name_end]; + // Leading `::` makes the lookup absolute (global + // namespace) — necessary inside `namespace eval + // vivado { … }` so the wrapper body doesn't + // recurse on itself. + out.push_str("::"); + out.push_str(name); + names.insert(name.to_string()); + i = name_end; + continue; + } + } + let ch_end = next_char_boundary(text, i); + out.push_str(&text[i..ch_end]); + i = ch_end; + } + ExternRewrite { + text: out, + names: names.into_iter().collect(), + } +} + +fn is_extern_ident_byte(b: u8) -> bool { + b.is_ascii_alphanumeric() || b == b'_' +} + +fn scan_extern_name_end(bytes: &[u8], start: usize) -> usize { + let mut i = start; + while i < bytes.len() { + if is_extern_ident_byte(bytes[i]) { + i += 1; + } else if bytes[i] == b':' + && bytes.get(i + 1).copied() == Some(b':') + && bytes.get(i + 2).copied().is_some_and(is_extern_ident_byte) + { + i += 2; + } else { + break; + } + } + i +} + +fn next_char_boundary(s: &str, start: usize) -> usize { + let mut end = start + 1; + while end < s.len() && !s.is_char_boundary(end) { + end += 1; + } + end +} + +/// Historically emitted a rename prelude that aliased each Vivado +/// native to a mangled name so wrappers could forward to the +/// underlying proc without recursing on themselves. With wrappers +/// now living in the `vivado::` namespace and no longer shadowing +/// the globals they wrap, no rename is needed — `extern::foo` +/// just rewrites to bare `foo`, which Tcl resolves to the global +/// native. Kept as a public symbol so callers don't have to track +/// the layering change; returns the empty string. +pub fn extern_rename_prelude(_names: &[String]) -> String { + String::new() +} + +/// True when `call_name` is the explicit `extern::…` form — used +/// by the validator to skip the unknown-call check for these +/// deliberately-external invocations. +pub fn is_extern_call(call_name: &str) -> bool { + call_name.starts_with(EXTERN_PREFIX) +} + +/// Reconstruct a command's words as lowered Tcl text. Splits the +/// problem along the AST's natural boundaries so each piece is +/// handled by the right rules: +/// +/// - Bare and quoted words are rebuilt part-by-part. Plain text, +/// `$var` references, and `\x` escapes go through verbatim; +/// `[ … ]` substitutions recurse into the lowering pipeline so +/// keyword → positional rewriting applies to calls *inside* a +/// `set proj [ create_project … ]`, and multi-line bracket +/// bodies collapse to one Tcl statement by construction. +/// - Braced words are literal text — Tcl never substitutes inside +/// `{ … }`, so the parser doesn't even surface `CmdSubst` parts +/// for them; we ship them as raw source. +fn lower_words( + words: &[Word], + source: &str, + table: &SignatureTable<'_>, +) -> String { + // Preserve source-level adjacency between consecutive words. + // The parser splits `{*}$var` into two AST words ({*} as a + // braced "*", $var as a bare word) but their source spans + // touch — Tcl reads them as the expand-prefix operator. + // Joining with a literal space would force `{*} $var`, which + // Tcl reinterprets as a literal-`*`-arg followed by `$var`. + // Checking adjacency keeps the no-space form for `{*}$var` + // while still spacing genuinely-whitespace-separated words. + let mut out = String::new(); + for (i, w) in words.iter().enumerate() { + if i > 0 { + let prev_end = words[i - 1].span.end; + if w.span.start > prev_end { + out.push(' '); + } + } + out.push_str(&lower_word(w, source, table)); + } + out +} + +fn lower_word(word: &Word, source: &str, table: &SignatureTable<'_>) -> String { + match word.form { + WordForm::Bare => lower_word_parts(&word.parts, source, table), + WordForm::Quoted => { + let inner = lower_word_parts(&word.parts, source, table); + format!("\"{inner}\"") + } + WordForm::Braced => word.span.slice(source).to_string(), + } +} + +fn lower_word_parts( + parts: &[WordPart], + source: &str, + table: &SignatureTable<'_>, +) -> String { + let mut out = String::new(); + for part in parts { + match part { + WordPart::Text { value, .. } => out.push_str(value), + WordPart::VarRef { name, .. } => { + out.push('$'); + out.push_str(name); + } + WordPart::Escape { value, .. } => { + out.push('\\'); + out.push(*value); + } + WordPart::CmdSubst { body, .. } => { + let lowered: Vec = body + .iter() + .filter_map(|s| match s { + Stmt::Command(c) => { + Some(lower_command(c, source, table)) + } + _ => None, + }) + .filter(|s| !s.trim().is_empty()) + .collect(); + out.push('['); + out.push_str(&lowered.join("; ")); + out.push(']'); + } + } + } + out +} + +/// Helper retained for symmetry with future analyzers that want to +/// inspect a word's literal form without re-walking its parts. +#[allow(dead_code)] +fn word_text(word: &Word) -> Option { + let mut out = String::new(); + for part in &word.parts { + match part { + WordPart::Text { value, .. } => out.push_str(value), + WordPart::Escape { value, .. } => out.push(*value), + _ => return None, + } + } + Some(out) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::parse; + + fn lowered(src: &str) -> Vec { + let parsed = parse(src); + assert!(parsed.errors.is_empty(), "{:?}", parsed.errors); + let table = signature_table(&parsed.document); + parsed + .document + .stmts + .iter() + .filter_map(|s| match s { + Stmt::Command(c) => Some(lower_command(c, src, &table)), + _ => None, + }) + .collect() + } + + #[test] + fn proc_decl_emits_kwargs_prelude() { + // Every htcl proc lowers to `proc name {args} { ::vw::kwargs + // ... ; body }` — the runtime helper parses the caller's + // `-flag value` pairs into local variables matching the + // declared param names, with defaults applied where the + // caller didn't supply a flag. Body text passes through + // unchanged. + let src = "proc f {\n @default(0) a\n @default(1) b\n} { puts hi }\n"; + let out = lowered(src); + assert!( + out[0].starts_with("proc f {args} {"), + "wrong arg-list form: {}", + out[0] + ); + assert!( + out[0].contains("::vw::kwargs $args {a 0 b 1}"), + "missing or wrong kwargs prelude: {}", + out[0] + ); + assert!(out[0].contains("puts hi"), "lost body: {}", out[0]); + } + + #[test] + fn call_with_flags_ships_verbatim() { + // No more compile-time keyword→positional rewrite. The call + // ships as the user typed it; the wrapper proc parses the + // keywords at runtime via its kwargs prelude. + let src = "proc f {\n a\n b\n} { puts hi }\nf -b 22 -a 11\n"; + let out = lowered(src); + assert_eq!(out[1], "f -b 22 -a 11"); + } + + #[test] + fn call_with_omitted_arg_ships_verbatim() { + // The wrapper's default is wired in at runtime by + // ::vw::kwargs; the call site doesn't need to fill it. + let src = "proc f {\n @default(7) a\n b\n} { puts hi }\nf -b 22\n"; + let out = lowered(src); + assert_eq!(out[1], "f -b 22"); + } + + #[test] + fn inner_call_inside_brackets_ships_verbatim() { + // What this test used to assert (`[make xc foo]` — + // keyword→positional rewrite for the inner call) is no + // longer the architecture. The inner call ships verbatim; + // the wrapper parses `-part`/`-name` at runtime. The only + // transformation we still apply is multi-line bracket + // flattening. + let src = "proc make { + @default(\"\") part + name +} { puts ok } +set proj [ + make + -part xc + -name foo +] +"; + let out = lowered(src); + assert_eq!(out.len(), 2, "{:?}", out); + let set_line = &out[1]; + // Inner call stays keyword-form: `make -part xc -name foo`. + assert!( + set_line.contains("[make -part xc -name foo]"), + "inner call should ship verbatim; got: {set_line}" + ); + // Multi-line bracket body still collapses to one line. + assert!( + !set_line.contains('\n'), + "expected single line; got: {set_line:?}" + ); + } + + #[test] + fn call_inside_proc_body_ships_verbatim() { + // Regression guard for the create_bd_design bug: a + // keyword-form call to a known wrapper, nested inside + // another proc's body, must NOT be rewritten. In the old + // architecture the lowerer only saw top-level call sites + // and silently failed to translate this one, so at runtime + // Tcl handed `-name cips` to a positional-only wrapper + // proc and errored "wrong # args". Now the wrapper parses + // keywords at runtime, so we just ship the call as-is. + let src = "proc create_bd_design { @default(\"\") name } { puts ok }\n\ + proc configure_cips {} {\n \ + create_bd_design -name cips\n\ + }\n"; + let out = lowered(src); + // The configure_cips proc decl is the second statement. + // Its body should still contain the keyword-form call — + // we don't touch it at compile time. + assert!( + out[1].contains("create_bd_design -name cips"), + "call inside proc body should ship verbatim; got:\n{}", + out[1] + ); + } + + #[test] + fn proc_with_no_default_emits_empty_string_default() { + // An htcl arg without `@default` is implicitly required — + // the validator catches a missing-flag call at compile + // time. At runtime we still need a placeholder default so + // `::vw::kwargs` doesn't blow up when the variable is + // referenced before the (missing) `-flag` would have set + // it; we use `""` (empty string). + let src = "proc f {\n required_arg\n} { puts hi }\n"; + let out = lowered(src); + assert!( + out[0].contains("::vw::kwargs $args {required_arg \"\"}"), + "wrong default for required arg: {}", + out[0] + ); + } + + #[test] + fn multiline_bracket_substitution_collapses_to_one_line() { + // The exact shape that broke the REPL: an outer call whose + // sole arg is a `[ … ]` substitution spanning multiple + // source lines. Tcl would parse the bracket body as N + // separate commands; we have to flatten the newlines. + let src = "set proj [\n create_project\n -in_memory 1\n -name foo\n]\n"; + let out = lowered(src); + assert_eq!(out.len(), 1); + // No literal newline inside the brackets after lowering. + let between = out[0] + .split_once('[') + .and_then(|(_, rest)| rest.rsplit_once(']')) + .map(|(inner, _)| inner) + .unwrap(); + assert!(!between.contains('\n'), "lowered: {:?}", out[0]); + // The full call must still parse as `set proj [ ... ]`. + assert!(out[0].starts_with("set proj [")); + assert!(out[0].trim_end().ends_with(']')); + } + + #[test] + fn nested_multiline_brackets_all_collapse() { + // `[outer [inner ...] ...]` — newlines inside both layers + // become spaces; the parser sees nested CmdSubst so the + // recursive collection covers both. + let src = + "set x [\n foo\n -a [\n bar\n -b 1\n ]\n]\n"; + let out = lowered(src); + assert!(!out[0].contains('\n'), "lowered: {:?}", out[0]); + } + + #[test] + fn newlines_inside_braced_groups_stay_intact() { + // Inside `{ … }` the brackets are literal, not a + // substitution. The parser doesn't emit a `CmdSubst` for + // them so we must not strip newlines from braced bodies. + let src = "proc f {} {\n puts a\n puts b\n}\n"; + let out = lowered(src); + // The proc-decl lowering builds its own output (not the + // verbatim path), so it preserves body newlines. + assert!(out[0].contains('\n'), "lowered: {:?}", out[0]); + } + + #[test] + fn rewrite_externs_anchors_at_global_namespace() { + let r = rewrite_externs( + "set cmd [list extern::set_property]\n\ + extern::create_project -name foo\n", + ); + // Leading `::` anchors the lookup at Tcl's global + // namespace, which is where unshadowed Vivado natives + // live — necessary so wrapper bodies inside `namespace + // eval vivado { … }` don't recurse on themselves. + assert!(r.text.contains("[list ::set_property]"), "{}", r.text); + assert!(r.text.contains("::create_project -name foo"), "{}", r.text); + assert!(!r.text.contains("extern::"), "{}", r.text); + assert_eq!(r.names, vec!["create_project", "set_property"]); + } + + #[test] + fn rewrite_externs_preserves_namespaced_names() { + let r = rewrite_externs("extern::common::send_msg_id A B C\n"); + // Same anchoring for multi-segment names — + // `::common::send_msg_id` resolves the leading namespace + // search from the global root. + assert!(r.text.contains("::common::send_msg_id A B C"), "{}", r.text); + assert_eq!(r.names, vec!["common::send_msg_id"]); + } + + #[test] + fn rewrite_externs_respects_word_boundary() { + let r = rewrite_externs("set x not_extern::foo\n"); + assert_eq!(r.text, "set x not_extern::foo\n"); + assert!(r.names.is_empty()); + } + + #[test] + fn extern_rename_prelude_is_empty() { + // Wrappers no longer shadow globals (they live in the + // `vivado::` namespace), so the historical rename plumbing + // is unnecessary. The helper still exists for API stability + // but always returns empty. + let p = extern_rename_prelude(&["set_property".to_string()]); + assert!(p.is_empty(), "{p}"); + } + + #[test] + fn is_extern_call_recognizes_prefix() { + assert!(is_extern_call("extern::set_property")); + assert!(is_extern_call("extern::common::send_msg_id")); + assert!(!is_extern_call("set_property")); + assert!(!is_extern_call("not_extern::foo")); + } + + #[test] + fn unknown_command_passes_through() { + let src = "puts \"hello $x\"\n"; + let out = lowered(src); + assert_eq!(out[0], "puts \"hello $x\""); + } + + #[test] + fn string_default_quotes_correctly_in_kwargs_sig() { + // Defaults are stamped into the proc's kwargs-prelude sig + // dict, not into the call site. A `@default("hi")` becomes + // the literal `"hi"` (quoted) in the dict — `::vw::kwargs` + // sets `$greeting` to it when the caller omits the flag. + let src = "proc f {\n @default(\"hi\") greeting\n} { puts hi }\n"; + let out = lowered(src); + assert!( + out[0].contains("::vw::kwargs $args {greeting \"hi\"}"), + "default should appear quoted in the sig dict: {}", + out[0] + ); + } +} diff --git a/vw-htcl/src/overload.rs b/vw-htcl/src/overload.rs new file mode 100644 index 0000000..9f5027a --- /dev/null +++ b/vw-htcl/src/overload.rs @@ -0,0 +1,162 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Codegen for the enum-overload dispatcher. +//! +//! When the validator classifies a set of procs sharing a name as +//! a valid enum-overload (see +//! [`crate::validate::build_signature_table_with_overloads`]), the +//! lowerer rewrites each specialization under its mangled name +//! (`____`) and emits a single public dispatcher +//! proc that switches on the tagged value's variant tag and calls +//! the right specialization with the unwrapped payload. +//! +//! Runtime model: +//! +//! ```tcl +//! proc handle_prop {v args} { +//! switch -- [lindex $v 0] { +//! Scalar { return [__handle_prop__Scalar [lindex $v 1] {*}$args] } +//! Nested { return [__handle_prop__Nested [lindex $v 1] {*}$args] } +//! default { error "handle_prop: unknown variant '[lindex $v 0]'" } +//! } +//! } +//! ``` +//! +//! The payload is unwrapped before the specialization runs — the +//! body of `proc handle_prop {v: Property::Scalar} ...` sees `$v` +//! as the bare payload (a `string`), matching Haskell `case` +//! semantics. +//! +//! Empty-payload variants still get `[lindex $v 1]` passed through +//! (it's the empty string for a single-element list) — for those +//! the specialization's body shouldn't reference `$v` and the +//! lowering should ideally drop the arg, but for v1 we pass +//! uniformly for simplicity. + +use crate::ast::OverloadInfo; + +/// Generate the public-name dispatcher proc for an overload set. +/// +/// `tail_arg_names` is the list of tail arg names (after the +/// dispatched first arg). They thread through via `{*}$args` so +/// the public signature stays `{v args}` regardless of arity — +/// keeping the dispatcher uniform across overload sets with +/// different tail shapes. Specializations always receive the +/// payload as their first positional arg, then the tail by +/// position. +pub fn emit_dispatcher(info: &OverloadInfo) -> String { + let mut out = String::new(); + // The dispatcher takes the same kwargs envelope every other + // proc takes (so calls can pass `- `). It + // extracts the dispatched-arg value from the kwargs args list + // and switches on its tag. Specializations receive the + // payload via the same kwargs protocol — `- ` + // — so their bodies bind `$` to the unwrapped payload + // naturally. + let pub_name = &info.public_name; + let arg = &info.dispatch_arg_name; + out.push_str(&format!("proc {pub_name} {{args}} {{\n")); + // Grab the dispatched arg's value from the kwargs args list. + // We look for `- ` pairwise; if the user passes + // positional, well, that's not a supported form for overloaded + // procs in v1 (kwargs-only). + out.push_str(&format!( + " set __vw_disp \"\"\n \ + foreach {{__vw_k __vw_v}} $args {{\n \ + if {{$__vw_k eq \"-{arg}\"}} {{ set __vw_disp $__vw_v; break }}\n \ + }}\n" + )); + out.push_str(" switch -- [lindex $__vw_disp 0] {\n"); + for v in &info.variants { + // Build a new args list with the dispatched-arg's value + // replaced by the unwrapped payload, then forward the full + // args list (including any tail args we pass through for + // future multi-arg overload support) to the specialization. + out.push_str(&format!( + " {variant} {{\n \ + set __vw_new [list]\n \ + foreach {{__vw_k __vw_v}} $args {{\n \ + if {{$__vw_k eq \"-{arg}\"}} {{\n \ + lappend __vw_new $__vw_k [lindex $__vw_v 1]\n \ + }} else {{\n \ + lappend __vw_new $__vw_k $__vw_v\n \ + }}\n \ + }}\n \ + return [{mangled} {{*}}$__vw_new]\n \ + }}\n", + variant = v.variant_name, + mangled = v.mangled_proc_name, + )); + } + out.push_str(&format!( + " default {{ error \"{pub_name}: unknown variant '[lindex $__vw_disp 0]'\" }}\n" + )); + out.push_str(" }\n"); + out.push_str("}\n"); + out +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::{OverloadInfo, OverloadVariant}; + use crate::span::Span; + use crate::validate::mangle_specialization; + + fn info(public: &str, variants: &[&str]) -> OverloadInfo { + OverloadInfo { + public_name: public.into(), + enum_name: "E".into(), + dispatch_arg_name: "v".into(), + variants: variants + .iter() + .map(|v| OverloadVariant { + variant_name: (*v).into(), + mangled_proc_name: mangle_specialization(public, v), + dispatch_arg_span: Span::new(0, 0), + }) + .collect(), + anchor_span: Span::new(0, 0), + } + } + + #[test] + fn dispatcher_two_arms() { + let i = info("handle_prop", &["Scalar", "Nested"]); + let d = emit_dispatcher(&i); + // Dispatcher takes the standard `{args}` kwargs envelope. + assert!(d.contains("proc handle_prop {args}")); + // Walks kwargs to find the `-v ` pair. + assert!(d.contains("if {$__vw_k eq \"-v\"}")); + assert!(d.contains("switch -- [lindex $__vw_disp 0]")); + // Each arm forwards to its mangled specialization with the + // unwrapped payload spliced back into the kwargs list. + assert!(d.contains("Scalar {")); + assert!(d.contains("__handle_prop__Scalar")); + assert!(d.contains("Nested {")); + assert!(d.contains("__handle_prop__Nested")); + assert!(d.contains("default {")); + assert!(d.contains("unknown variant")); + } + + #[test] + fn dispatcher_single_arm() { + let i = info("only_one", &["Solo"]); + let d = emit_dispatcher(&i); + assert!(d.contains("proc only_one {args}")); + assert!(d.contains("__only_one__Solo")); + } + + #[test] + fn dispatcher_includes_default_arm() { + // Future-proof against runtime corruption / unanticipated + // tag values. The validator's exhaustiveness check guards + // the source side; the default arm guards the runtime + // side. + let i = info("foo", &["A", "B"]); + let d = emit_dispatcher(&i); + assert!(d.contains("default { error")); + } +} diff --git a/vw-htcl/src/parser.rs b/vw-htcl/src/parser.rs new file mode 100644 index 0000000..b3b8df7 --- /dev/null +++ b/vw-htcl/src/parser.rs @@ -0,0 +1,1547 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! htcl source parser. +//! +//! Builds a [`Document`] CST plus a list of [`ParseError`]s. The parser +//! is error-tolerant: when a command can't be parsed it is recorded as +//! a [`Stmt::Error`] and the parser resyncs at the next statement +//! boundary (newline or semicolon). This is what makes the same parser +//! usable from the LSP, where input is incomplete by definition. +//! +//! The outer statement loop is hand-rolled (it owns recovery and +//! collects doc comments); inner pieces — words, parts, escapes — +//! drive [`winnow::LocatingSlice`] for position tracking. As the grammar +//! grows past Phase 0 the inner pieces will lean on winnow combinators +//! more heavily. + +use winnow::stream::{Location, Stream}; +use winnow::LocatingSlice; + +use crate::ast::*; +use crate::span::Span; + +type Input<'i> = LocatingSlice<&'i str>; + +#[derive(Clone, Debug)] +pub struct ParseError { + pub message: String, + pub span: Span, +} + +#[derive(Clone, Debug)] +pub struct ParseOutput { + pub document: Document, + pub errors: Vec, +} + +pub fn parse(source: &str) -> ParseOutput { + let mut input = LocatingSlice::new(source); + let mut errors = Vec::new(); + let mut document = + parse_document(&mut input, source, &mut errors, Mode::Toplevel); + populate_procs(&mut document.stmts, source, &mut errors); + ParseOutput { document, errors } +} + +/// Statement-termination mode for the parser. +/// +/// At the top level (and inside proc bodies, which are themselves +/// scripts) a newline ends a command — the historical Tcl rule. Inside +/// a `[ … ]` command substitution we relax that: newlines are +/// whitespace and only `;` (or the closing bracket, which is EOF for +/// the interior parser) terminates a command. That lets a single call +/// span lines without `\` continuations, e.g. +/// +/// ```htcl +/// set x [ +/// create_cpm5_cpm_pcie0 +/// -cell cpm5 +/// -max_link_speed 32.0_GT/s +/// ] +/// ``` +/// +/// Multi-command `[…]` (rare in practice — only the last command's +/// value flows out) still works via explicit `;`. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum Mode { + Toplevel, + BracketBody, +} + +/// Post-pass over every `proc` — top-level *and* nested — that fills +/// in the structured args [`signature`](crate::ast::Proc::signature) +/// and parses the proc [`body`](crate::ast::Proc::body) into real +/// statements. +/// +/// The body is parsed as a standalone fragment (its braces are +/// already stripped by [`inner_text_span`]); the resulting spans are +/// relative to the fragment, so they're shifted by the body's start +/// offset back into whole-source coordinates before being stored. +/// After shifting they're absolute, which lets the recursion process +/// nested procs against the original `source` uniformly. +fn populate_procs( + stmts: &mut [crate::ast::Stmt], + source: &str, + errors: &mut Vec, +) { + use crate::ast::{CommandKind, Stmt}; + use crate::proc_args::parse_proc_args; + for stmt in stmts.iter_mut() { + let Stmt::Command(cmd) = stmt else { continue }; + + // Walk every word's parts and parse `[…]` command substitution + // interiors into statements. Spans inside the parsed body get + // shifted into whole-source coordinates so downstream analyses + // can navigate them uniformly with top-level commands. + for word in &mut cmd.words { + populate_cmd_subst_parts(&mut word.parts, source, errors); + } + + match &mut cmd.kind { + CommandKind::Proc(proc) => { + let (sig, errs) = parse_proc_args(source, proc.args_span); + errors.extend(errs); + proc.signature = Some(sig); + + // Parse the return-type annotation if the outer + // parse recorded one. We have `source` and the + // error sink here, so this is the right place to + // do it — `classify_command` only recorded the + // (inner, brace-stripped) span. + if let Some(rt_span) = proc.return_type_span { + let text = rt_span.slice(source); + match crate::type_parse::parse(text, rt_span.start) { + Ok(ty) => proc.return_type = Some(ty), + Err(e) => errors.push(ParseError { + message: e.message, + span: e.span, + }), + } + } + // Mirror the return type onto the signature so the + // signature-table-based lookup paths (REPL repr + // formatter, hover) see it without re-walking + // back to the Proc node. + if let Some(sig) = proc.signature.as_mut() { + sig.return_type = proc.return_type.clone(); + } + + let delta = proc.body_span.start; + let body_text = proc.body_span.slice(source); + // Proc bodies are scripts — newlines still terminate + // statements there. + let (mut body_stmts, body_errs) = + parse_fragment(body_text, Mode::Toplevel); + for stmt in &mut body_stmts { + shift_stmt(stmt, delta); + } + for mut err in body_errs { + err.span = err.span.shifted(delta); + errors.push(err); + } + proc.body = body_stmts; + + // Spans are now absolute, so nested procs can be processed + // against the same `source`. + populate_procs(&mut proc.body, source, errors); + } + CommandKind::TypeDecl(td) => { + let text = td.underlying_span.slice(source); + match crate::type_parse::parse(text, td.underlying_span.start) { + Ok(ty) => td.underlying = Some(ty), + Err(e) => errors.push(ParseError { + message: e.message, + span: e.span, + }), + } + } + CommandKind::EnumDecl(ed) => { + let text = ed.body_span.slice(source); + match crate::enum_parse::parse(text, ed.body_span.start) { + Ok(vs) => ed.variants = vs, + Err(e) => errors.push(ParseError { + message: e.message, + span: e.span, + }), + } + } + CommandKind::NamespaceEval(ns) => { + // Same body-recursion as `proc` — the braced body is + // a script fragment, parsed in toplevel mode so + // newlines terminate statements normally. + let delta = ns.body_span.start; + let body_text = ns.body_span.slice(source); + let (mut body_stmts, body_errs) = + parse_fragment(body_text, Mode::Toplevel); + for stmt in &mut body_stmts { + shift_stmt(stmt, delta); + } + for mut err in body_errs { + err.span = err.span.shifted(delta); + errors.push(err); + } + ns.body = body_stmts; + populate_procs(&mut ns.body, source, errors); + } + _ => {} + } + } +} + +fn populate_cmd_subst_parts( + parts: &mut [WordPart], + source: &str, + errors: &mut Vec, +) { + for part in parts { + let WordPart::CmdSubst { + source: text, + span, + body, + } = part + else { + continue; + }; + // `span` covers the whole `[...]` including the brackets, and + // `text` is the interior; the first interior byte sits at + // `span.start + 1`. + let delta = span.start + 1; + // Bracket-body mode: newlines are whitespace, so multi-line + // calls inside `[ … ]` parse as one command without `\`. + let (mut body_stmts, body_errs) = + parse_fragment(text, Mode::BracketBody); + for s in &mut body_stmts { + shift_stmt(s, delta); + } + for mut err in body_errs { + err.span = err.span.shifted(delta); + errors.push(err); + } + *body = body_stmts; + populate_procs(body, source, errors); + } +} + +/// Parse a fragment of htcl (e.g. a proc body) into statements. Spans +/// are relative to `text`; the caller shifts them into whole-source +/// coordinates. +fn parse_fragment( + text: &str, + mode: Mode, +) -> (Vec, Vec) { + let mut input = LocatingSlice::new(text); + let mut errors = Vec::new(); + let document = parse_document(&mut input, text, &mut errors, mode); + (document.stmts, errors) +} + +fn shift_stmt(stmt: &mut crate::ast::Stmt, delta: u32) { + use crate::ast::Stmt; + match stmt { + Stmt::Command(cmd) => shift_command(cmd, delta), + Stmt::Comment(c) => c.span = c.span.shifted(delta), + Stmt::Error(e) => e.span = e.span.shifted(delta), + } +} + +fn shift_command(cmd: &mut Command, delta: u32) { + cmd.span = cmd.span.shifted(delta); + for word in &mut cmd.words { + shift_word(word, delta); + } + // At this stage nested procs carry only the spans produced by + // `parse_document`; `signature` is still `None` and `body` empty, + // both filled later by the caller's `populate_procs` recursion. + match &mut cmd.kind { + CommandKind::Proc(proc) => { + proc.name_span = proc.name_span.shifted(delta); + proc.args_span = proc.args_span.shifted(delta); + proc.body_span = proc.body_span.shifted(delta); + if let Some(ref mut s) = proc.return_type_span { + *s = s.shifted(delta); + } + // `return_type` is None at this point (parsed later in + // `populate_procs` using the now-absolute span), so + // there's nothing to shift inside it. + } + CommandKind::NamespaceEval(ns) => { + ns.name_span = ns.name_span.shifted(delta); + ns.body_span = ns.body_span.shifted(delta); + } + CommandKind::TypeDecl(td) => { + td.name_span = td.name_span.shifted(delta); + td.underlying_span = td.underlying_span.shifted(delta); + } + CommandKind::EnumDecl(ed) => { + ed.name_span = ed.name_span.shifted(delta); + ed.body_span = ed.body_span.shifted(delta); + // Variants are filled later by `populate_procs` using + // the now-absolute body_span, so there's nothing to + // shift inside them yet. + } + _ => {} + } +} + +fn shift_word(word: &mut Word, delta: u32) { + word.span = word.span.shifted(delta); + for part in &mut word.parts { + let span = match part { + WordPart::Text { span, .. } + | WordPart::VarRef { span, .. } + | WordPart::CmdSubst { span, .. } + | WordPart::Escape { span, .. } => span, + }; + *span = span.shifted(delta); + } +} + +#[derive(Clone, Debug)] +struct InnerError { + message: String, + #[allow(dead_code)] + span: Span, +} + +fn parse_document( + input: &mut Input<'_>, + source: &str, + errors: &mut Vec, + mode: Mode, +) -> Document { + let start = input.location(); + let mut stmts = Vec::new(); + let mut pending_docs: Vec = Vec::new(); + + loop { + skip_inline_ws(input, source, mode); + if at_eof(input, source) { + break; + } + let c = current_char(input, source); + // In `BracketBody`, `\n` is whitespace consumed by + // `skip_inline_ws`, so it never reaches this match. + let is_separator = match mode { + Mode::Toplevel => c == '\n' || c == ';', + Mode::BracketBody => c == ';', + }; + match c { + _ if is_separator => { + advance_char(input); + // A statement separator drops any orphan doc comments; + // doc comments only attach to the immediately + // following command. + if matches!(c, ';') { + // semicolons don't break doc attachment within a line + } else { + // Blank line breaks the doc-comment run only if + // the next non-whitespace is itself a blank line. + // For v0 we keep this simple: any `\n` between a + // doc comment and the next command keeps the + // attachment so long as nothing else intervenes. + } + continue; + } + '#' => { + let comment = parse_comment(input, source); + if comment.is_doc { + pending_docs.push(comment.text.clone()); + } else { + pending_docs.clear(); + } + stmts.push(Stmt::Comment(comment)); + } + _ => { + let cmd_start = input.location(); + match parse_command(input, source, mode) { + Ok(mut cmd) => { + cmd.doc_comments = std::mem::take(&mut pending_docs); + stmts.push(Stmt::Command(cmd)); + } + Err(err) => { + pending_docs.clear(); + // Resync to the next statement boundary. In + // `BracketBody` only `;` breaks; the surrounding + // `]` is EOF for the interior parser. + while !at_eof(input, source) { + let c = current_char(input, source); + let stop = match mode { + Mode::Toplevel => c == '\n' || c == ';', + Mode::BracketBody => c == ';', + }; + if stop { + break; + } + advance_char(input); + } + let span = Span::new( + cmd_start as u32, + input.location() as u32, + ); + errors.push(ParseError { + message: err.message.clone(), + span, + }); + stmts.push(Stmt::Error(ParseFailure { + message: err.message, + span, + })); + } + } + } + } + } + + Document { + stmts, + span: Span::new(start as u32, input.location() as u32), + } +} + +fn parse_comment(input: &mut Input<'_>, source: &str) -> Comment { + let start = input.location(); + advance_char(input); // leading `#` + let mut is_doc = false; + if !at_eof(input, source) && current_char(input, source) == '#' { + is_doc = true; + advance_char(input); + } + // Leading single space after `#` / `##` is conventionally part of + // the marker; trim it so callers see the raw comment text. + if !at_eof(input, source) && current_char(input, source) == ' ' { + advance_char(input); + } + let text_start = input.location(); + while !at_eof(input, source) { + let c = current_char(input, source); + if c == '\n' { + break; + } + advance_char(input); + } + let text_end = input.location(); + Comment { + text: source[text_start..text_end].to_string(), + span: Span::new(start as u32, text_end as u32), + is_doc, + } +} + +fn parse_command( + input: &mut Input<'_>, + source: &str, + mode: Mode, +) -> Result { + let start = input.location(); + let mut words = Vec::new(); + loop { + skip_inline_ws(input, source, mode); + if at_eof(input, source) { + break; + } + let c = current_char(input, source); + let terminate = match mode { + Mode::Toplevel => c == '\n' || c == ';', + // In bracket-body, only `;` terminates a command — `\n` + // is whitespace consumed by `skip_inline_ws`. + Mode::BracketBody => c == ';', + }; + if terminate { + break; + } + words.push(parse_word(input, source)?); + } + if words.is_empty() { + return Err(InnerError { + message: "expected command".into(), + span: Span::new(start as u32, input.location() as u32), + }); + } + let span = Span::new(start as u32, input.location() as u32); + let kind = classify_command(&words); + Ok(Command { + words, + span, + kind, + doc_comments: Vec::new(), + }) +} + +fn classify_command(words: &[Word]) -> CommandKind { + let Some(first) = words.first() else { + return CommandKind::Generic; + }; + match first.as_text() { + Some("set") => CommandKind::Set, + Some("src") if words.len() == 2 => { + let path_word = &words[1]; + CommandKind::Src(SrcImport { + path: path_word.as_text().map(String::from), + path_span: path_word.span, + }) + } + Some("proc") if words.len() >= 4 => { + let name_word = &words[1]; + let args_word = &words[2]; + // 5 words = return-type slot present: + // proc NAME { args } TYPE { body } + // 4 words = no return type: + // proc NAME { args } { body } + // (>5 words is treated as 5+ junk; the body is taken + // from words[4] and the rest is silently ignored. + // The return-type slot is parsed in `populate_procs` + // where we have `source` and an error sink — we only + // record the span here.) + let (return_type_span, body_word) = if words.len() >= 5 { + (Some(inner_text_span(&words[3])), &words[4]) + } else { + (None, &words[3]) + }; + let name = name_word.as_text().map(|s| s.to_string()); + CommandKind::Proc(Proc { + name, + name_span: name_word.span, + args_span: inner_text_span(args_word), + body_span: inner_text_span(body_word), + signature: None, + return_type: None, + return_type_span, + body: Vec::new(), + }) + } + // `type NAME = UNDERLYING` newtype declaration. The `=` may + // be its own word (`type T = U`) or fused (`type T=U`) — + // Tcl word splitting is whitespace-driven, so we accept + // either by checking the third word. The underlying type + // is parsed in `populate_procs`'s second pass (same + // rationale as `proc`'s return type). + Some("type") if words.len() >= 3 => { + let name_word = &words[1]; + let underlying_word = + if words.len() >= 4 && words[2].as_text() == Some("=") { + &words[3] + } else { + &words[2] + }; + CommandKind::TypeDecl(crate::ast::TypeDecl { + name: name_word.as_text().map(String::from), + name_span: name_word.span, + underlying: None, + underlying_span: inner_text_span(underlying_word), + }) + } + // `enum NAME = { …variants… }` sum-type declaration. + // Same `=`-may-or-may-not-be-its-own-word convention as + // `type`. The body word is brace-wrapped; its contents + // (the variant list) are parsed in `populate_procs`'s + // second pass, when we have the source + error sink. + Some("enum") if words.len() >= 3 => { + let name_word = &words[1]; + let body_word = + if words.len() >= 4 && words[2].as_text() == Some("=") { + &words[3] + } else { + &words[2] + }; + CommandKind::EnumDecl(crate::ast::EnumDecl { + name: name_word.as_text().map(String::from), + name_span: name_word.span, + variants: Vec::new(), + body_span: inner_text_span(body_word), + }) + } + Some("namespace") + if words.len() >= 4 + && words.get(1).and_then(Word::as_text) == Some("eval") => + { + let name_word = &words[2]; + let body_word = &words[3]; + CommandKind::NamespaceEval(crate::ast::NamespaceEval { + name: name_word.as_text().map(String::from), + name_span: name_word.span, + body_span: inner_text_span(body_word), + body: Vec::new(), + }) + } + _ => CommandKind::Generic, + } +} + +/// For a braced word, return the span of the brace contents (without +/// the braces themselves). For any other word, return its full span. +/// Used so Phase 2's structured-proc reparse and the LSP can point at +/// the parseable interior directly. +fn inner_text_span(word: &Word) -> Span { + if word.form == WordForm::Braced { + if let [WordPart::Text { span, .. }] = word.parts.as_slice() { + return *span; + } + } + word.span +} + +fn parse_word(input: &mut Input<'_>, source: &str) -> Result { + let start = input.location(); + let c = current_char(input, source); + let (form, parts) = match c { + '{' => parse_braced_word(input, source)?, + '"' => parse_quoted_word(input, source)?, + _ => parse_bare_word(input, source)?, + }; + let end = input.location(); + Ok(Word { + form, + parts, + span: Span::new(start as u32, end as u32), + }) +} + +fn parse_braced_word( + input: &mut Input<'_>, + source: &str, +) -> Result<(WordForm, Vec), InnerError> { + let open_cp = input.checkpoint(); + let open = input.location(); + advance_char(input); // { + let inner_start = input.location(); + let mut depth = 1usize; + while !at_eof(input, source) { + let c = current_char(input, source); + match c { + '\\' => { + advance_char(input); + if !at_eof(input, source) { + advance_char(input); + } + } + '{' => { + depth += 1; + advance_char(input); + } + '}' => { + depth -= 1; + if depth == 0 { + let inner_end = input.location(); + advance_char(input); + let text = source[inner_start..inner_end].to_string(); + return Ok(( + WordForm::Braced, + vec![WordPart::Text { + value: text, + span: Span::new( + inner_start as u32, + inner_end as u32, + ), + }], + )); + } + advance_char(input); + } + _ => advance_char(input), + } + } + // Unterminated: rewind to just past the open brace so the outer + // loop's resync can find the next statement boundary instead of + // being stuck at EOF. + input.reset(&open_cp); + advance_char(input); + Err(InnerError { + message: "unterminated brace group".into(), + span: Span::new(open as u32, (open + 1) as u32), + }) +} + +fn parse_quoted_word( + input: &mut Input<'_>, + source: &str, +) -> Result<(WordForm, Vec), InnerError> { + let open = input.location(); + advance_char(input); // " + let parts = collect_parts(input, source, Some('"'))?; + if at_eof(input, source) || current_char(input, source) != '"' { + return Err(InnerError { + message: "unterminated string".into(), + span: Span::new(open as u32, input.location() as u32), + }); + } + advance_char(input); // closing " + Ok((WordForm::Quoted, parts)) +} + +fn parse_bare_word( + input: &mut Input<'_>, + source: &str, +) -> Result<(WordForm, Vec), InnerError> { + let start = input.location(); + let parts = collect_parts(input, source, None)?; + if parts.is_empty() { + return Err(InnerError { + message: "expected word".into(), + span: Span::new(start as u32, input.location() as u32), + }); + } + Ok((WordForm::Bare, parts)) +} + +/// Accumulate [`WordPart`]s. +/// +/// `terminator` controls the stop condition: `Some('"')` for +/// double-quoted words (stops at the closing quote, newlines are +/// content), `None` for bare words (stops at whitespace, `;`, `\n`, +/// EOF). +fn collect_parts( + input: &mut Input<'_>, + source: &str, + terminator: Option, +) -> Result, InnerError> { + let mut parts = Vec::new(); + let mut text_buf = String::new(); + let mut text_start: Option = None; + + let flush = |parts: &mut Vec, + buf: &mut String, + start: &mut Option, + end: u32| { + if let Some(s) = start.take() { + if !buf.is_empty() { + parts.push(WordPart::Text { + value: std::mem::take(buf), + span: Span::new(s, end), + }); + } + buf.clear(); + } + }; + + loop { + if at_eof(input, source) { + break; + } + let c = current_char(input, source); + if Some(c) == terminator { + break; + } + if terminator.is_none() { + match c { + ' ' | '\t' | '\r' | '\n' | ';' => break, + _ => {} + } + } + match c { + '$' => { + flush( + &mut parts, + &mut text_buf, + &mut text_start, + input.location() as u32, + ); + parts.push(parse_var_ref(input, source)?); + } + '[' => { + flush( + &mut parts, + &mut text_buf, + &mut text_start, + input.location() as u32, + ); + parts.push(parse_cmd_subst(input, source)?); + } + '\\' => { + flush( + &mut parts, + &mut text_buf, + &mut text_start, + input.location() as u32, + ); + parts.push(parse_escape(input, source)?); + } + _ => { + if text_start.is_none() { + text_start = Some(input.location() as u32); + } + text_buf.push(c); + advance_char(input); + } + } + } + flush( + &mut parts, + &mut text_buf, + &mut text_start, + input.location() as u32, + ); + Ok(parts) +} + +fn parse_var_ref( + input: &mut Input<'_>, + source: &str, +) -> Result { + let start = input.location(); + advance_char(input); // $ + if at_eof(input, source) { + return Err(InnerError { + message: "expected variable name after `$`".into(), + span: Span::new(start as u32, input.location() as u32), + }); + } + let mut name = String::new(); + if current_char(input, source) == '{' { + advance_char(input); + while !at_eof(input, source) { + let c = current_char(input, source); + if c == '}' { + advance_char(input); + return Ok(WordPart::VarRef { + name, + span: Span::new(start as u32, input.location() as u32), + }); + } + name.push(c); + advance_char(input); + } + return Err(InnerError { + message: "unterminated `${...}`".into(), + span: Span::new(start as u32, input.location() as u32), + }); + } + while !at_eof(input, source) { + let c = current_char(input, source); + if c.is_alphanumeric() || c == '_' || c == ':' { + name.push(c); + advance_char(input); + } else { + break; + } + } + Ok(WordPart::VarRef { + name, + span: Span::new(start as u32, input.location() as u32), + }) +} + +fn parse_cmd_subst( + input: &mut Input<'_>, + source: &str, +) -> Result { + let start = input.location(); + advance_char(input); // [ + let inner_start = input.location(); + let mut depth = 1usize; + while !at_eof(input, source) { + let c = current_char(input, source); + match c { + '\\' => { + advance_char(input); + if !at_eof(input, source) { + advance_char(input); + } + } + '[' => { + depth += 1; + advance_char(input); + } + ']' => { + depth -= 1; + if depth == 0 { + let inner_end = input.location(); + advance_char(input); + let span = Span::new(start as u32, input.location() as u32); + let text = source[inner_start..inner_end].to_string(); + return Ok(WordPart::CmdSubst { + source: text, + span, + body: Vec::new(), + }); + } + advance_char(input); + } + _ => advance_char(input), + } + } + Err(InnerError { + message: "unterminated `[...]` command substitution".into(), + span: Span::new(start as u32, input.location() as u32), + }) +} + +fn parse_escape( + input: &mut Input<'_>, + source: &str, +) -> Result { + let start = input.location(); + advance_char(input); // backslash + if at_eof(input, source) { + return Err(InnerError { + message: "trailing `\\` at end of input".into(), + span: Span::new(start as u32, input.location() as u32), + }); + } + let c = current_char(input, source); + advance_char(input); + let value = match c { + 'n' => '\n', + 't' => '\t', + 'r' => '\r', + '\\' => '\\', + '"' => '"', + '[' => '[', + ']' => ']', + '{' => '{', + '}' => '}', + '$' => '$', + other => other, + }; + Ok(WordPart::Escape { + value, + span: Span::new(start as u32, input.location() as u32), + }) +} + +fn skip_inline_ws(input: &mut Input<'_>, source: &str, mode: Mode) { + while !at_eof(input, source) { + let c = current_char(input, source); + if c == ' ' || c == '\t' || c == '\r' { + advance_char(input); + } else if mode == Mode::BracketBody && c == '\n' { + // Inside `[ … ]` the newline isn't a statement terminator; + // it's just whitespace. + advance_char(input); + } else if c == '\\' { + let pos = input.location(); + if pos + 1 < source.len() && source.as_bytes()[pos + 1] == b'\n' { + advance_char(input); + advance_char(input); + } else { + break; + } + } else { + break; + } + } +} + +fn at_eof(input: &Input<'_>, source: &str) -> bool { + input.location() >= source.len() +} + +fn current_char(input: &Input<'_>, source: &str) -> char { + source[input.location()..].chars().next().unwrap_or('\0') +} + +fn advance_char(input: &mut Input<'_>) { + let _ = input.next_token(); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_empty() { + let out = parse(""); + assert!(out.document.stmts.is_empty()); + assert!(out.errors.is_empty()); + } + + #[test] + fn parse_comment_and_doc() { + let src = "# regular\n## doc text\nputs hi\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + assert_eq!(out.document.stmts.len(), 3); + let Stmt::Command(cmd) = &out.document.stmts[2] else { + panic!("expected command, got {:?}", out.document.stmts[2]); + }; + assert_eq!(cmd.doc_comments, vec!["doc text".to_string()]); + assert_eq!(cmd.words[0].as_text(), Some("puts")); + assert_eq!(cmd.words[1].as_text(), Some("hi")); + } + + #[test] + fn parse_set_command() { + let out = parse("set x 42"); + assert!(out.errors.is_empty()); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!(); + }; + assert!(matches!(cmd.kind, CommandKind::Set)); + assert_eq!(cmd.words.len(), 3); + } + + #[test] + fn parse_proc_braced() { + let src = "proc greet {name} { puts \"hi $name\" }\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!(); + }; + let CommandKind::Proc(proc) = &cmd.kind else { + panic!("expected proc, got {:?}", cmd.kind); + }; + assert_eq!(proc.name.as_deref(), Some("greet")); + let args = proc.args_span.slice(src); + let body = proc.body_span.slice(src); + assert_eq!(args, "name"); + assert!(body.contains("puts")); + // No return type slot. + assert!(proc.return_type.is_none()); + assert!(proc.return_type_span.is_none()); + } + + #[test] + fn parse_proc_with_return_type_named() { + let src = "proc f {} string { return foo }\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::Proc(proc) = &cmd.kind else { + panic!() + }; + assert_eq!(proc.name.as_deref(), Some("f")); + let body = proc.body_span.slice(src); + assert!(body.contains("return foo")); + let ty = proc.return_type.as_ref().expect("return type set"); + assert_eq!(ty.name(), "string"); + match ty { + crate::ast::TypeExpr::Named { .. } => {} + _ => panic!("expected Named"), + } + } + + #[test] + fn parse_proc_with_return_type_generic_no_whitespace() { + let src = "proc f {} list { return {} }\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::Proc(proc) = &cmd.kind else { + panic!() + }; + let ty = proc.return_type.as_ref().unwrap(); + let crate::ast::TypeExpr::Generic { name, args, .. } = ty else { + panic!("expected Generic") + }; + assert_eq!(name, "list"); + assert_eq!(args.len(), 1); + assert_eq!(args[0].name(), "bd_cell"); + } + + #[test] + fn parse_proc_with_return_type_nested_generic() { + let src = "proc f {} list> { return {} }\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::Proc(proc) = &cmd.kind else { + panic!() + }; + let ty = proc.return_type.as_ref().unwrap(); + let crate::ast::TypeExpr::Generic { args, .. } = ty else { + panic!() + }; + let crate::ast::TypeExpr::Generic { + name: inner_name, + args: inner_args, + .. + } = &args[0] + else { + panic!("expected nested Generic") + }; + assert_eq!(inner_name, "dict"); + assert_eq!(inner_args.len(), 2); + } + + #[test] + fn parse_proc_with_return_type_bracketed_whitespace() { + let src = "proc f {} {dict} { return {} }\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::Proc(proc) = &cmd.kind else { + panic!() + }; + let ty = proc.return_type.as_ref().unwrap(); + let crate::ast::TypeExpr::Generic { name, args, .. } = ty else { + panic!() + }; + assert_eq!(name, "dict"); + assert_eq!(args.len(), 2); + assert_eq!(args[0].name(), "string"); + assert_eq!(args[1].name(), "int"); + } + + #[test] + fn parse_proc_with_invalid_return_type_emits_diagnostic() { + let src = "proc f {} list< { return {} }\n"; + let out = parse(src); + assert!( + !out.errors.is_empty(), + "expected a parse-error diagnostic for bad type" + ); + assert!(out.errors.iter().any(|e| e.message.contains("expected") + || e.message.contains("unterminated"))); + } + + #[test] + fn parse_type_decl_named() { + let src = "type bd_cell = string\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::TypeDecl(td) = &cmd.kind else { + panic!("expected TypeDecl, got {:?}", cmd.kind) + }; + assert_eq!(td.name.as_deref(), Some("bd_cell")); + let underlying = td.underlying.as_ref().unwrap(); + assert_eq!(underlying.name(), "string"); + } + + #[test] + fn parse_type_decl_generic_underlying() { + let src = "type fancy_dict = {dict}\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::TypeDecl(td) = &cmd.kind else { + panic!() + }; + let crate::ast::TypeExpr::Generic { name, args, .. } = + td.underlying.as_ref().unwrap() + else { + panic!() + }; + assert_eq!(name, "dict"); + assert_eq!(args.len(), 2); + } + + #[test] + fn parse_type_decl_without_equals_works() { + // `type T U` (no `=`) is also accepted — the `=` is sugar. + let src = "type widget string\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::TypeDecl(td) = &cmd.kind else { + panic!() + }; + assert_eq!(td.name.as_deref(), Some("widget")); + assert_eq!(td.underlying.as_ref().unwrap().name(), "string"); + } + + #[test] + fn parse_type_decl_with_bad_underlying_emits_diagnostic() { + let src = "type foo = \n"; + let out = parse(src); + assert!( + !out.errors.is_empty(), + "expected diagnostic for malformed underlying type" + ); + } + + // --- enum declarations ----------------------------------------- + + #[test] + fn parse_enum_decl_simple() { + let src = "enum Direction = {\n North\n South\n East\n West\n}\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::EnumDecl(ed) = &cmd.kind else { + panic!("expected EnumDecl, got {:?}", cmd.kind); + }; + assert_eq!(ed.name.as_deref(), Some("Direction")); + assert_eq!(ed.variants.len(), 4); + for v in &ed.variants { + assert!(v.payload.is_none(), "expected empty-payload variant"); + } + let names: Vec<&str> = + ed.variants.iter().map(|v| v.name.as_str()).collect(); + assert_eq!(names, vec!["North", "South", "East", "West"]); + } + + #[test] + fn parse_enum_decl_with_payloads() { + let src = "enum Property = {\n Scalar: string\n Nested: dict\n}\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::EnumDecl(ed) = &cmd.kind else { + panic!() + }; + assert_eq!(ed.name.as_deref(), Some("Property")); + assert_eq!(ed.variants.len(), 2); + assert_eq!(ed.variants[0].name, "Scalar"); + assert_eq!(ed.variants[0].payload.as_ref().unwrap().name(), "string"); + assert_eq!(ed.variants[1].name, "Nested"); + let crate::ast::TypeExpr::Generic { name, args, .. } = + ed.variants[1].payload.as_ref().unwrap() + else { + panic!(); + }; + assert_eq!(name, "dict"); + assert_eq!(args.len(), 2); + assert_eq!(args[0].name(), "string"); + assert_eq!(args[1].name(), "Property"); + } + + #[test] + fn parse_enum_decl_mixed_payload_and_empty() { + let src = + "enum Mix = {\n Empty\n WithInt: int\n Other\n WithList: list\n}\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::EnumDecl(ed) = &cmd.kind else { + panic!() + }; + assert_eq!(ed.variants.len(), 4); + assert!(ed.variants[0].payload.is_none()); + assert_eq!(ed.variants[1].payload.as_ref().unwrap().name(), "int"); + assert!(ed.variants[2].payload.is_none()); + assert_eq!(ed.variants[3].payload.as_ref().unwrap().name(), "list"); + } + + #[test] + fn parse_enum_decl_without_equals() { + let src = "enum Color {\n Red\n Green\n Blue\n}\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::EnumDecl(ed) = &cmd.kind else { + panic!() + }; + assert_eq!(ed.name.as_deref(), Some("Color")); + assert_eq!(ed.variants.len(), 3); + } + + #[test] + fn parse_enum_decl_with_bad_variant_emits_diagnostic() { + // 123Foo is not a valid identifier — should diagnose. + let src = "enum Bad = {\n 123Foo: int\n}\n"; + let out = parse(src); + assert!( + !out.errors.is_empty(), + "expected diagnostic for malformed variant" + ); + } + + #[test] + fn parse_proc_with_qualified_arg_type() { + // The `E::V` qualified syntax for overloaded handler args. + let src = + "proc handle_prop {v: Property::Scalar} string { return $v }\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!() + }; + let CommandKind::Proc(proc) = &cmd.kind else { + panic!() + }; + let sig = proc.signature.as_ref().unwrap(); + assert_eq!(sig.args.len(), 1); + let arg = &sig.args[0]; + assert_eq!(arg.name, "v"); + let crate::ast::TypeExpr::Qualified { + namespace, variant, .. + } = arg.type_annotation.as_ref().unwrap() + else { + panic!( + "expected Qualified type annotation, got {:?}", + arg.type_annotation + ); + }; + assert_eq!(namespace, "Property"); + assert_eq!(variant, "Scalar"); + } + + #[test] + fn parse_variable_and_subst() { + let src = "puts $x [foo bar]"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!(); + }; + assert_eq!(cmd.words.len(), 3); + let WordPart::VarRef { name, .. } = &cmd.words[1].parts[0] else { + panic!("expected var ref"); + }; + assert_eq!(name, "x"); + let WordPart::CmdSubst { source: src, .. } = &cmd.words[2].parts[0] + else { + panic!("expected cmd subst"); + }; + assert_eq!(src, "foo bar"); + } + + #[test] + fn parse_quoted_with_subst() { + let src = r#"puts "hello $name""#; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!(); + }; + assert_eq!(cmd.words[1].form, WordForm::Quoted); + assert_eq!(cmd.words[1].parts.len(), 2); + let WordPart::Text { value, .. } = &cmd.words[1].parts[0] else { + panic!(); + }; + assert_eq!(value, "hello "); + let WordPart::VarRef { name, .. } = &cmd.words[1].parts[1] else { + panic!(); + }; + assert_eq!(name, "name"); + } + + #[test] + fn recovers_from_unterminated_brace() { + let src = "puts {oops\nputs ok\n"; + let out = parse(src); + assert!(!out.errors.is_empty()); + assert!(out.errors[0].message.contains("brace group")); + // After the error we should still see the second `puts ok`. + let ok_cmd = out.document.stmts.iter().find_map(|s| match s { + Stmt::Command(c) + if c.words.first().and_then(|w| w.as_text()) + == Some("puts") + && c.words.get(1).and_then(|w| w.as_text()) + == Some("ok") => + { + Some(c) + } + _ => None, + }); + assert!( + ok_cmd.is_some(), + "expected recovery: {:?}", + out.document.stmts + ); + } + + #[test] + fn proc_body_parses_into_statements_with_absolute_spans() { + let src = "proc outer {\n a\n} {\n inner_call foo\n}\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!(); + }; + let CommandKind::Proc(proc) = &cmd.kind else { + panic!("expected proc"); + }; + assert_eq!(proc.body.len(), 1, "{:?}", proc.body); + let Stmt::Command(body_cmd) = &proc.body[0] else { + panic!("expected command in body"); + }; + // Span is absolute: it slices back to the original source. + assert_eq!(body_cmd.words[0].span.slice(src), "inner_call"); + assert_eq!( + body_cmd.span.start as usize, + src.find("inner_call").unwrap() + ); + } + + #[test] + fn nested_proc_body_is_parsed_recursively() { + let src = + "proc outer {\n a\n} {\n proc inner {\n b\n} {\n deep\n}\n}\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!(); + }; + let CommandKind::Proc(outer) = &cmd.kind else { + panic!("expected outer proc"); + }; + let Stmt::Command(inner_cmd) = &outer.body[0] else { + panic!("expected inner proc command"); + }; + let CommandKind::Proc(inner) = &inner_cmd.kind else { + panic!("expected inner proc"); + }; + assert_eq!(inner.name.as_deref(), Some("inner")); + // Inner proc got its signature and body populated too. + assert!(inner.signature.is_some()); + let Stmt::Command(deep) = &inner.body[0] else { + panic!("expected deep command"); + }; + assert_eq!(deep.words[0].span.slice(src), "deep"); + } + + #[test] + fn parses_src_statement() { + let src = "src common/log\n"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!(); + }; + let CommandKind::Src(import) = &cmd.kind else { + panic!("expected Src, got {:?}", cmd.kind); + }; + assert_eq!(import.path.as_deref(), Some("common/log")); + assert_eq!(import.path_span.slice(src), "common/log"); + } + + #[test] + fn parses_src_with_named_dep_prefix() { + let out = parse("src @xilinx-ip/cpm5\n"); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!(); + }; + let CommandKind::Src(import) = &cmd.kind else { + panic!("expected Src"); + }; + assert_eq!(import.path.as_deref(), Some("@xilinx-ip/cpm5")); + } + + #[test] + fn src_with_extra_words_is_generic() { + // `src a b` isn't a valid import — it falls back to generic so + // the validator can report it as an unknown command rather than + // the parser silently accepting it. + let out = parse("src a b\n"); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!(); + }; + assert!(matches!(cmd.kind, CommandKind::Generic), "{:?}", cmd.kind); + } + + #[test] + fn bracket_body_treats_newlines_as_whitespace() { + // Multi-line call inside `[ … ]` parses as a *single* command, + // no backslash continuations needed. + let src = "\ +set cell [ + create_cpm5_cpm_pcie0 + -cell cpm5 + -max_link_speed 32.0_GT/s +] +"; + let out = parse(src); + assert!(out.errors.is_empty(), "{:?}", out.errors); + let Stmt::Command(set_cmd) = &out.document.stmts[0] else { + panic!(); + }; + assert!(matches!(set_cmd.kind, CommandKind::Set)); + // The `set`'s value word is the cmd-subst; its body should be + // a single command with five words. + let WordPart::CmdSubst { body, .. } = &set_cmd.words[2].parts[0] else { + panic!("expected CmdSubst"); + }; + assert_eq!(body.len(), 1, "{body:#?}"); + let Stmt::Command(inner) = &body[0] else { + panic!(); + }; + let word_texts: Vec<&str> = + inner.words.iter().filter_map(|w| w.as_text()).collect(); + assert_eq!( + word_texts, + vec![ + "create_cpm5_cpm_pcie0", + "-cell", + "cpm5", + "-max_link_speed", + "32.0_GT/s", + ] + ); + } + + #[test] + fn bracket_body_still_separates_on_semicolon() { + // Explicit `;` keeps the multi-command form available inside + // brackets for users who want it. + let src = "set x [a 1 ; b 2]\n"; + let out = parse(src); + assert!(out.errors.is_empty()); + let Stmt::Command(set_cmd) = &out.document.stmts[0] else { + panic!(); + }; + let WordPart::CmdSubst { body, .. } = &set_cmd.words[2].parts[0] else { + panic!(); + }; + assert_eq!(body.len(), 2, "{body:#?}"); + } + + #[test] + fn toplevel_newlines_still_terminate() { + // The bracket-body relaxation does not leak to the top level. + let src = "puts a\nputs b\n"; + let out = parse(src); + let cmds: Vec<&Command> = out + .document + .stmts + .iter() + .filter_map(|s| { + if let Stmt::Command(c) = s { + Some(c) + } else { + None + } + }) + .collect(); + assert_eq!(cmds.len(), 2); + } + + #[test] + fn proc_body_newlines_still_terminate() { + // Proc bodies are scripts; the relaxation is bracket-only. + let src = "proc f {} {\n puts a\n puts b\n}\n"; + let out = parse(src); + let Stmt::Command(cmd) = &out.document.stmts[0] else { + panic!(); + }; + let CommandKind::Proc(proc) = &cmd.kind else { + panic!(); + }; + assert_eq!(proc.body.len(), 2); + } + + #[test] + fn semicolon_separates_commands() { + let src = "set a 1; set b 2"; + let out = parse(src); + assert!(out.errors.is_empty()); + let cmds: Vec<&Command> = out + .document + .stmts + .iter() + .filter_map(|s| { + if let Stmt::Command(c) = s { + Some(c) + } else { + None + } + }) + .collect(); + assert_eq!(cmds.len(), 2); + } +} diff --git a/vw-htcl/src/proc_args.rs b/vw-htcl/src/proc_args.rs new file mode 100644 index 0000000..37b5a43 --- /dev/null +++ b/vw-htcl/src/proc_args.rs @@ -0,0 +1,588 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Parser for the structured proc-arg grammar (Phase 2). +//! +//! Operates on the inner contents of a `proc`'s args braces — the +//! span passed in must point at the text between the braces, not +//! including the braces themselves. All spans on the returned AST +//! nodes are absolute file offsets, so they slot directly into the +//! main document's diagnostics without rebasing. +//! +//! Grammar: +//! +//! ```text +//! args := arg_item* +//! arg_item := doc_comment* attribute* IDENT ( ':' TYPE )? +//! attribute := '@' IDENT ( '(' value ( ',' value )* ')' )? +//! value := integer | string | ident +//! TYPE := IDENT ( '<' TYPE ( ',' TYPE )* '>' )? +//! ``` +//! +//! The optional `: TYPE` slot turns an arg from an opaque +//! identifier into a typed one. Adoption is gradual — existing +//! libraries without annotations still parse identically. +//! +//! Whitespace, blank lines, and non-doc comments are skippable +//! between items. + +use crate::ast::{Attribute, AttributeValue, ProcArg, ProcSignature}; +use crate::parser::ParseError; +use crate::span::Span; + +pub fn parse_proc_args( + full_source: &str, + args_span: Span, +) -> (ProcSignature, Vec) { + let inner = args_span.slice(full_source); + let mut state = State { + inner, + base: args_span.start, + pos: 0, + errors: Vec::new(), + }; + let mut args = Vec::new(); + state.parse_args(&mut args); + let State { errors, .. } = state; + ( + ProcSignature { + args, + span: args_span, + // Filled in later by the parser's `populate_procs` + // pass once the return-type annotation has been parsed. + return_type: None, + }, + errors, + ) +} + +struct State<'a> { + inner: &'a str, + /// Absolute file offset where `inner` starts. + base: u32, + /// Byte offset into `inner`. + pos: usize, + errors: Vec, +} + +impl<'a> State<'a> { + fn at_eof(&self) -> bool { + self.pos >= self.inner.len() + } + + fn current(&self) -> char { + self.inner[self.pos..].chars().next().unwrap_or('\0') + } + + fn peek_at(&self, offset: usize) -> char { + let target = self.pos + offset; + if target >= self.inner.len() { + '\0' + } else { + self.inner[target..].chars().next().unwrap_or('\0') + } + } + + fn abs(&self) -> u32 { + self.base + self.pos as u32 + } + + fn bump(&mut self) { + if let Some(c) = self.inner[self.pos..].chars().next() { + self.pos += c.len_utf8(); + } + } + + fn skip_horizontal_ws(&mut self) { + while !self.at_eof() { + let c = self.current(); + if c == ' ' || c == '\t' || c == '\r' { + self.bump(); + } else { + break; + } + } + } + + /// Consume blank lines, comments, and whitespace; doc comments + /// (`##`) are collected and returned so they can attach to the + /// next arg item. + fn skip_separators(&mut self) -> Vec { + let mut docs = Vec::new(); + loop { + // Whitespace including newlines + while !self.at_eof() { + let c = self.current(); + if c.is_whitespace() { + self.bump(); + } else { + break; + } + } + if self.at_eof() { + break; + } + if self.current() == '#' { + let is_doc = self.peek_at(1) == '#'; + self.bump(); + if is_doc { + self.bump(); + } + if !self.at_eof() && self.current() == ' ' { + self.bump(); + } + let text_start = self.pos; + while !self.at_eof() && self.current() != '\n' { + self.bump(); + } + let text = self.inner[text_start..self.pos].to_string(); + if is_doc { + docs.push(text); + } + continue; + } + break; + } + docs + } + + fn parse_args(&mut self, out: &mut Vec) { + loop { + let docs = self.skip_separators(); + if self.at_eof() { + if !docs.is_empty() { + // Doc comments with nothing to attach to. Warn so + // the user knows they're unused. + self.errors.push(ParseError { + message: "doc comment with no following argument" + .into(), + span: Span::new(self.abs(), self.abs()), + }); + } + break; + } + let item_start = self.abs(); + let mut attributes = Vec::new(); + // Attributes can be interleaved with whitespace and + // doc comments themselves can't appear between attrs, + // only at the head — that's the convention from the + // project plan ("doc comments first, then attributes in + // any order, then the argument name"). + while !self.at_eof() && self.current() == '@' { + if let Some(attr) = self.parse_attribute() { + attributes.push(attr); + } + self.skip_horizontal_ws(); + // Allow newlines between attributes. + while !self.at_eof() && self.current() == '\n' { + self.bump(); + self.skip_horizontal_ws(); + } + } + // Identifier. + self.skip_horizontal_ws(); + if self.at_eof() { + self.errors.push(ParseError { + message: "expected argument name".into(), + span: Span::new(item_start, self.abs()), + }); + break; + } + let name_start = self.abs(); + let name = self.consume_ident(); + if name.is_empty() { + let c = self.current(); + self.errors.push(ParseError { + message: format!("expected argument name, found {c}"), + span: Span::new(self.abs(), self.abs() + 1), + }); + // Resync: drop whatever non-whitespace junk is here + // so we can try the next item. + while !self.at_eof() && !self.current().is_whitespace() { + self.bump(); + } + continue; + } + let name_span = Span::new(name_start, self.abs()); + // Optional `: TYPE` annotation. Tcl strings allow `:` + // in bare words, but we're in the structured-args + // sub-grammar — distinct rules apply here. A `:` + // immediately after the arg name (with optional + // horizontal whitespace) opens the annotation slot. + self.skip_horizontal_ws(); + let type_annotation = if !self.at_eof() && self.current() == ':' { + self.bump(); // ':' + self.skip_horizontal_ws(); + let ty_start = self.abs(); + // Consume up to whitespace or end of arg. The type + // mini-parser handles its own internal grammar + // (idents, '<', ',', '>'); we just need to slice + // the right text out of the source. + while !self.at_eof() { + let c = self.current(); + if c.is_whitespace() || c == '#' { + break; + } + self.bump(); + } + let ty_end = self.abs(); + let text = &self.inner[(ty_start - self.base) as usize + ..(ty_end - self.base) as usize]; + match crate::type_parse::parse(text, ty_start) { + Ok(ty) => Some(ty), + Err(e) => { + self.errors.push(ParseError { + message: e.message, + span: e.span, + }); + None + } + } + } else { + None + }; + let span = Span::new(item_start, self.abs()); + out.push(ProcArg { + name, + name_span, + doc_comments: docs, + attributes, + type_annotation, + span, + }); + } + } + + fn parse_attribute(&mut self) -> Option { + let start = self.abs(); + self.bump(); // '@' + let name_start = self.abs(); + let name = self.consume_ident(); + if name.is_empty() { + self.errors.push(ParseError { + message: "expected attribute name after @".into(), + span: Span::new(start, self.abs()), + }); + return None; + } + let name_span = Span::new(name_start, self.abs()); + let mut values = Vec::new(); + if !self.at_eof() && self.current() == '(' { + self.bump(); + loop { + self.skip_horizontal_ws(); + // Allow newlines inside the value list + while !self.at_eof() && self.current() == '\n' { + self.bump(); + self.skip_horizontal_ws(); + } + if self.at_eof() { + self.errors.push(ParseError { + message: "unterminated attribute argument list".into(), + span: Span::new(start, self.abs()), + }); + break; + } + if self.current() == ')' { + self.bump(); + break; + } + match self.parse_value() { + Some(v) => values.push(v), + None => { + // Drop characters up to `,` or `)` to resync. + while !self.at_eof() { + let c = self.current(); + if c == ',' || c == ')' || c == '\n' { + break; + } + self.bump(); + } + } + } + self.skip_horizontal_ws(); + while !self.at_eof() && self.current() == '\n' { + self.bump(); + self.skip_horizontal_ws(); + } + if self.at_eof() { + continue; + } + if self.current() == ',' { + self.bump(); + } + } + } + Some(Attribute { + name, + name_span, + values, + span: Span::new(start, self.abs()), + }) + } + + fn parse_value(&mut self) -> Option { + let start = self.abs(); + let c = self.current(); + if c == '"' { + self.bump(); + let text_start = self.pos; + let mut buf = String::new(); + while !self.at_eof() && self.current() != '"' { + if self.current() == '\\' { + self.bump(); + if !self.at_eof() { + buf.push(self.current()); + self.bump(); + } + } else { + buf.push(self.current()); + self.bump(); + } + } + let _ = text_start; + if self.at_eof() { + self.errors.push(ParseError { + message: "unterminated string".into(), + span: Span::new(start, self.abs()), + }); + } else { + self.bump(); // closing " + } + Some(AttributeValue::String { + value: buf, + span: Span::new(start, self.abs()), + }) + } else if c == '-' || c.is_ascii_digit() { + let mut buf = String::new(); + if c == '-' { + buf.push('-'); + self.bump(); + } + while !self.at_eof() && self.current().is_ascii_digit() { + buf.push(self.current()); + self.bump(); + } + match buf.parse::() { + Ok(value) => Some(AttributeValue::Integer { + value, + span: Span::new(start, self.abs()), + }), + Err(_) => { + self.errors.push(ParseError { + message: format!("invalid integer: {buf}"), + span: Span::new(start, self.abs()), + }); + None + } + } + } else if is_ident_start(c) { + let value = self.consume_ident(); + Some(AttributeValue::Ident { + value, + span: Span::new(start, self.abs()), + }) + } else { + self.errors.push(ParseError { + message: format!("expected attribute value, found {c}"), + span: Span::new(start, self.abs() + 1), + }); + None + } + } + + fn consume_ident(&mut self) -> String { + let mut out = String::new(); + let mut first = true; + while !self.at_eof() { + let c = self.current(); + let ok = if first { + is_ident_start(c) + } else { + is_ident_continue(c) + }; + if !ok { + break; + } + out.push(c); + self.bump(); + first = false; + } + out + } +} + +fn is_ident_start(c: char) -> bool { + c.is_alphabetic() || c == '_' +} + +fn is_ident_continue(c: char) -> bool { + c.is_alphanumeric() || c == '_' +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse(input: &str) -> (ProcSignature, Vec) { + // Pretend the inner args text starts at offset 0 in a virtual + // source identical to `input`. + let span = Span::new(0, input.len() as u32); + parse_proc_args(input, span) + } + + #[test] + fn empty_signature() { + let (sig, errs) = parse(""); + assert!(errs.is_empty()); + assert!(sig.args.is_empty()); + } + + #[test] + fn plain_arg_names() { + let (sig, errs) = parse("a b c"); + assert!(errs.is_empty(), "{:?}", errs); + let names: Vec<&str> = + sig.args.iter().map(|a| a.name.as_str()).collect(); + assert_eq!(names, vec!["a", "b", "c"]); + } + + #[test] + fn arg_with_named_type_annotation() { + let (sig, errs) = parse("object: bd_cell"); + assert!(errs.is_empty(), "{:?}", errs); + assert_eq!(sig.args.len(), 1); + let a = &sig.args[0]; + assert_eq!(a.name, "object"); + let ty = a.type_annotation.as_ref().expect("type set"); + assert_eq!(ty.name(), "bd_cell"); + } + + #[test] + fn arg_with_generic_type_annotation() { + let (sig, errs) = parse("cells: list"); + assert!(errs.is_empty(), "{:?}", errs); + let a = &sig.args[0]; + let crate::ast::TypeExpr::Generic { name, args, .. } = + a.type_annotation.as_ref().unwrap() + else { + panic!() + }; + assert_eq!(name, "list"); + assert_eq!(args[0].name(), "bd_cell"); + } + + #[test] + fn typed_and_untyped_args_mix() { + let (sig, errs) = parse("a b: string c"); + assert!(errs.is_empty(), "{:?}", errs); + assert_eq!(sig.args.len(), 3); + assert!(sig.args[0].type_annotation.is_none()); + assert_eq!( + sig.args[1].type_annotation.as_ref().unwrap().name(), + "string" + ); + assert!(sig.args[2].type_annotation.is_none()); + } + + #[test] + fn arg_with_attrs_and_type() { + let (sig, errs) = parse("@default(0) count: int"); + assert!(errs.is_empty(), "{:?}", errs); + let a = &sig.args[0]; + assert_eq!(a.name, "count"); + assert_eq!(a.attributes.len(), 1); + assert_eq!(a.attributes[0].name, "default"); + assert_eq!(a.type_annotation.as_ref().unwrap().name(), "int"); + } + + #[test] + fn arg_with_invalid_type_emits_diagnostic() { + let (_sig, errs) = parse("v: ::repr` in +//! the running Tcl interpreter: +//! +//! - For **primitives** (`string`, `int`, `bool`, `unit`), `::repr` +//! is shipped once at session start via [`emit_primitive_prelude`]. +//! - For **user-declared newtypes** (`bd_cell`, `widget`, …), `::repr` +//! is the user's own proc — the validator enforces it exists (see +//! [`crate::validate::build_type_decl_table`]). +//! - For **generics** (`list`, `dict`, nested combinations), +//! [`emit_repr`] monomorphizes a per-instantiation `::repr` +//! that delegates to its element / key / value reprs. Each unique +//! nested instantiation gets its own proc. +//! +//! All emission goes through [`vw_quote::quote_tcl!`] so word +//! quoting is handled automatically rather than via `format!` string +//! concatenation. +//! +//! Mangling: dot-free, separator `_`. `dict` → +//! `dict_string_int`; `list>` → +//! `list_dict_string_bd_cell`. The mangled string is used as the +//! namespace of the emitted proc — `dict_string_int::repr`. This +//! corner-collides only when a user declares `type X` whose name +//! happens to equal a mangled compiler-generated namespace (e.g. +//! `type dict_string_int`); pathological in practice. + +use std::collections::{HashMap, HashSet}; + +use vw_quote::quote_tcl; + +use crate::ast::{EnumDecl, EnumVariant, TypeDecl, TypeExpr}; + +/// Output of [`emit_repr`]: the per-type Tcl procs to ship (in +/// dependency order) and the dispatch name to invoke after they're +/// in scope. +#[derive(Clone, Debug)] +pub struct ReprEmission { + /// Tcl proc declarations to ship to the worker before any + /// expression that needs them. Each entry is a complete + /// `proc ::repr { v } { … }` source. + pub procs: Vec, + /// Fully-qualified Tcl proc to invoke: `::repr`. + pub dispatch: String, +} + +/// Mangled namespace name for `ty`. The compiler-emitted repr proc +/// for this type lives at `::repr`. +pub fn mangle(ty: &TypeExpr) -> String { + match ty { + TypeExpr::Named { name, .. } => name.clone(), + TypeExpr::Generic { name, args, .. } => { + let mut out = String::with_capacity(name.len() + args.len() * 8); + out.push_str(name); + for arg in args { + out.push('_'); + out.push_str(&mangle(arg)); + } + out + } + TypeExpr::Qualified { + namespace, variant, .. + } => { + // Qualified types (`Enum::Variant`) are only legal as + // the dispatch-arg annotation on an overloaded handler + // — the validator rejects them anywhere else, so + // codegen should never see one at a value position. If + // we hit this it's a validator bug. + panic!( + "internal error: TypeExpr::Qualified `{namespace}::{variant}` \ + reached codegen at a value position — validator should have \ + rejected this" + ); + } + } +} + +/// Fully-qualified Tcl name of `ty`'s repr proc — what a caller +/// invokes on a value to format it. +pub fn dispatch_name(ty: &TypeExpr) -> String { + format!("{}::repr", mangle(ty)) +} + +/// Fully-qualified Tcl name of `ty`'s `to_raw` proc — the +/// boundary-lowering helper that flattens a typed htcl value +/// down to the bare-Tcl form Vivado consumes through `extern::`. +/// Used by [`emit_to_raw_arm`] and by wrappers that explicitly +/// invoke a type's lowering on a typed arg before forwarding to +/// `extern::`. +pub fn to_raw_dispatch_name(ty: &TypeExpr) -> String { + format!("{}::to_raw", mangle(ty)) +} + +/// Fully-qualified Tcl name of `ty`'s `from_raw` proc — the +/// boundary-lifting helper that wraps a raw extern-returned +/// value into the typed form htcl downstream consumes. +pub fn from_raw_dispatch_name(ty: &TypeExpr) -> String { + format!("{}::from_raw", mangle(ty)) +} + +/// Whether `name` is a primitive type the compiler ships repr for. +/// Anything else is either a user-declared newtype (whose triplet is +/// validated separately) or a generic instantiation (whose repr is +/// emitted by [`emit_repr`]). +pub fn is_primitive(name: &str) -> bool { + matches!(name, "string" | "int" | "bool" | "unit") +} + +/// Emit the primitive prelude — Tcl source for the +/// `string` / `int` / `bool` / `unit` triplets (`repr` + `from` + +/// `to`). Shipped once at session start so every typed expression +/// downstream can rely on the primitives being defined. +/// +/// Each type's procs are wrapped in an explicit `namespace eval` +/// block. `string` is a Tcl built-in command, so the otherwise- +/// implicit `proc string::repr` namespace-creation hits a +/// "unknown namespace" error from the interpreter; wrapping in +/// `namespace eval string {...}` sidesteps that (we're operating +/// on the namespace as a Tcl namespace, not as a command class). +/// The same wrapping is applied uniformly to `int` / `bool` / +/// `unit` for consistency and so a future Tcl that promotes +/// `bool` or `int` to a built-in doesn't silently break us. +/// +/// `from` / `to` for primitives are identity (or coerce to the +/// canonical representation, e.g. `expr {int(...)}` for `int`). +pub fn emit_primitive_prelude() -> Vec { + // Compiler-emitted reprs share the same kwargs envelope as + // user-written newtype reprs (`proc ::repr {v: T} string + // { … }` lowers to `proc repr {args} { ::vw::kwargs $args + // {v ""}; … }`). The dispatch site (see + // `vw-repl::lower::wrap_with_repr`) always calls them with + // `-v ` so the kwargs envelope binds `$v` uniformly. + // Without this uniformity, user-written reprs (which can't + // avoid the kwargs wrap) would error on positional calls. + vec![ + // string: identity at every slot — including to_raw / from_raw, + // since the Tcl runtime representation of a string IS the raw + // value the extern boundary expects. + quote_tcl!( + "namespace eval string {\n \ + proc repr {args} { ::vw::kwargs $args {v \"\"}; return $v }\n \ + proc from {args} { ::vw::kwargs $args {v \"\"}; return $v }\n \ + proc to {args} { ::vw::kwargs $args {v \"\"}; return $v }\n \ + proc to_raw {args} { ::vw::kwargs $args {v \"\"}; return $v }\n \ + proc from_raw {args} { ::vw::kwargs $args {v \"\"}; return $v }\n\ + }\n" + ), + // int: format / coerce. to_raw / from_raw mirror to / from + // since Vivado consumes integer-shaped strings. + quote_tcl!( + "namespace eval int {\n \ + proc repr {args} { ::vw::kwargs $args {v \"\"}; return [format %d $v] }\n \ + proc from {args} { ::vw::kwargs $args {v \"\"}; return [expr {int($v)}] }\n \ + proc to {args} { ::vw::kwargs $args {v \"\"}; return [expr {int($v)}] }\n \ + proc to_raw {args} { ::vw::kwargs $args {v \"\"}; return [expr {int($v)}] }\n \ + proc from_raw {args} { ::vw::kwargs $args {v \"\"}; return [expr {int($v)}] }\n\ + }\n" + ), + // bool: textual form; 0/1 round-trip for from/to. to_raw / + // from_raw use the same 0/1 form Vivado expects. + quote_tcl!( + "namespace eval bool {\n \ + proc repr {args} { ::vw::kwargs $args {v \"\"}; return [expr {$v ? \"true\" : \"false\"}] }\n \ + proc from {args} { ::vw::kwargs $args {v \"\"}; return [expr {$v ? 1 : 0}] }\n \ + proc to {args} { ::vw::kwargs $args {v \"\"}; return [expr {$v ? 1 : 0}] }\n \ + proc to_raw {args} { ::vw::kwargs $args {v \"\"}; return [expr {$v ? 1 : 0}] }\n \ + proc from_raw {args} { ::vw::kwargs $args {v \"\"}; return [expr {$v ? 1 : 0}] }\n\ + }\n" + ), + // unit: empty value. The App suppresses on the *type*, not + // on the value — these procs exist so generics over `unit` + // still type-check, even though they're unusual. + quote_tcl!( + "namespace eval unit {\n \ + proc repr {args} { ::vw::kwargs $args {v \"\"}; return \"\" }\n \ + proc from {args} { ::vw::kwargs $args {v \"\"}; return \"\" }\n \ + proc to {args} { ::vw::kwargs $args {v \"\"}; return \"\" }\n \ + proc to_raw {args} { ::vw::kwargs $args {v \"\"}; return \"\" }\n \ + proc from_raw {args} { ::vw::kwargs $args {v \"\"}; return \"\" }\n\ + }\n" + ), + ] +} + +/// Walk `ty` depth-first, emitting one `::repr` proc per +/// unique generic instantiation along the way. Plain [`Named`] types +/// (primitives or user newtypes) don't get codegen here — their +/// reprs come from [`emit_primitive_prelude`] or from the user's +/// own `::repr` proc (validator-enforced). +/// +/// The returned dispatch name is `::repr` — the caller +/// invokes it on a value of type `ty` to get the display string. +pub fn emit_repr(ty: &TypeExpr) -> ReprEmission { + emit_repr_with_types(ty, &HashMap::new()) +} + +/// Same as [`emit_repr`] but also walks user-declared newtypes +/// (`type T = U`) — when the dispatch type is a newtype whose +/// underlying is a generic, the generic's repr needs to be in +/// scope so the user's `proc T::repr` body can call it. +/// +/// Without this recursion, `Properties::repr` (which delegates to +/// `dict_string_Property::repr`) errors at runtime with +/// `invalid command name "dict_string_Property::repr"` because +/// the monomorphized generic was never emitted. +pub fn emit_repr_with_types( + ty: &TypeExpr, + types: &HashMap, +) -> ReprEmission { + let mut procs = Vec::new(); + let mut seen = HashSet::new(); + emit_recursive(ty, &mut procs, &mut seen, types); + ReprEmission { + procs, + dispatch: dispatch_name(ty), + } +} + +/// Emit the auto-generated `namespace eval { … }` prelude +/// for an enum declaration. Contains: +/// +/// - **Constructors** — one per variant. Payload variants take a +/// `v` arg and return `[list $v]`; empty-payload +/// variants take no args and return `[list ]`. +/// - **`tag` / `payload`** — explicit unwrap accessors wrappers +/// use to bridge enum values into bare-Tcl `extern::` calls. +/// - **`repr`** — switches on `[lindex $v 0]`, calls each variant +/// payload type's `repr` and wraps as `()` for +/// payload variants, bare `` for empty ones. +/// - **`from` / `to`** — identity (enum values are already in their +/// canonical tagged-tuple form; the triplet exists so generics +/// over enums type-check uniformly with newtypes). +/// +/// The block is wrapped in `namespace eval` — Tcl auto-creates +/// the namespace on `proc ::` ONLY when nothing else +/// claims the name. For defensiveness (and so users can pick +/// enum names that happen to match a Tcl built-in's namespace +/// later without a confusing failure mode), we use the explicit +/// form, mirroring the primitive prelude. +pub fn emit_enum_prelude(enum_decl: &EnumDecl) -> String { + let Some(name) = enum_decl.name.as_deref() else { + // Anonymous enum — shouldn't happen post-parser, but + // bail rather than emit junk. + return String::new(); + }; + let mut body = String::new(); + body.push_str(&format!("namespace eval {name} {{\n")); + // Constructors — plain positional Tcl, called by user code as + // `Property::Scalar foo` (positional). NOT through the kwargs + // envelope. + for v in &enum_decl.variants { + emit_constructor(&mut body, &v.name, v.payload.is_some()); + } + // tag / payload — also positional; called by wrappers in + // `extern::` bridging code as `Property::payload $v`. + body.push_str(" proc tag {v} { return [lindex $v 0] }\n"); + body.push_str(" proc payload {v} { return [lindex $v 1] }\n"); + // repr / from / to — kwargs envelope so they're callable + // uniformly with all other reprs (the dispatch site emits + // `-v ` form universally; see + // `vw-repl::lower::wrap_with_repr`). + body.push_str(" proc repr {args} {\n"); + body.push_str(" ::vw::kwargs $args {v \"\"}\n"); + body.push_str(" switch -- [lindex $v 0] {\n"); + for v in &enum_decl.variants { + emit_repr_arm(&mut body, v); + } + body.push_str(" default { return \"\" }\n"); + body.push_str(" }\n"); + body.push_str(" }\n"); + // from / to are identity for enums (the constructors are the + // user-facing lift). + body.push_str( + " proc from {args} { ::vw::kwargs $args {v \"\"}; return $v }\n", + ); + body.push_str( + " proc to {args} { ::vw::kwargs $args {v \"\"}; return $v }\n", + ); + // to_raw: lower the tagged enum value to its raw extern-side + // representation. Switch on variant tag; for payload variants + // recurse via the payload type's to_raw; for empty variants + // emit the variant name (the convention extern Vivado calls + // recognize for tag-style values). See [docs/htcl-extern-boundary.md] + // for the rationale on this being mechanical / compiler-emitted. + body.push_str(" proc to_raw {args} {\n"); + body.push_str(" ::vw::kwargs $args {v \"\"}\n"); + body.push_str(" switch -- [lindex $v 0] {\n"); + for v in &enum_decl.variants { + emit_to_raw_arm(&mut body, v); + } + body.push_str( + " default { error \"unknown variant: [lindex $v 0]\" }\n", + ); + body.push_str(" }\n"); + body.push_str(" }\n"); + // from_raw: default lift wraps the raw value as the FIRST + // variant. For sum types where the right variant depends on + // the value's shape (e.g. Property — Scalar vs Nested + // chosen by structural inference), users override via + // `proc ::from_raw` AFTER the compiler-emitted prelude; + // Tcl's last-`proc`-wins lets the user override take + // precedence. + if let Some(first) = enum_decl.variants.first() { + emit_from_raw_default(&mut body, first); + } else { + body.push_str( + " proc from_raw {args} { ::vw::kwargs $args {v \"\"}; return \"\" }\n", + ); + } + body.push_str("}\n"); + body +} + +/// Emit one arm of `::to_raw`'s `switch -- [lindex $v 0]` +/// body: for payload variants, recurse via the payload type's +/// `to_raw`; for empty variants, emit the variant name as the +/// raw value (matches how extern Vivado callers receive bare +/// enum-style tags). +fn emit_to_raw_arm(out: &mut String, v: &EnumVariant) { + let variant = &v.name; + match &v.payload { + None => { + out.push_str(&format!( + " {variant} {{ return \"{variant}\" }}\n" + )); + } + Some(payload_ty) => { + let dispatch = to_raw_dispatch_name(payload_ty); + out.push_str(&format!( + " {variant} {{ return [{dispatch} -v [lindex $v 1]] }}\n" + )); + } + } +} + +/// Default `::from_raw` body — wrap input as the first +/// variant. For payload variants, the input flows through the +/// payload type's `from_raw` first. For empty variants, the +/// input is ignored and we return the bare-variant constructor. +fn emit_from_raw_default(out: &mut String, first: &EnumVariant) { + let variant = &first.name; + match &first.payload { + None => { + out.push_str(&format!( + " proc from_raw {{args}} {{\n \ + ::vw::kwargs $args {{v \"\"}}\n \ + return [list {variant}]\n \ + }}\n", + )); + } + Some(payload_ty) => { + let dispatch = from_raw_dispatch_name(payload_ty); + out.push_str(&format!( + " proc from_raw {{args}} {{\n \ + ::vw::kwargs $args {{v \"\"}}\n \ + return [list {variant} [{dispatch} -v $v]]\n \ + }}\n", + )); + } + } +} + +fn emit_constructor(out: &mut String, variant: &str, has_payload: bool) { + if has_payload { + out.push_str(&format!( + " proc {variant} {{v}} {{ return [list {variant} $v] }}\n" + )); + } else { + out.push_str(&format!( + " proc {variant} {{}} {{ return [list {variant}] }}\n" + )); + } +} + +fn emit_repr_arm(out: &mut String, v: &EnumVariant) { + let variant = &v.name; + match &v.payload { + None => { + // Empty-payload: just the bare variant name. + out.push_str(&format!( + " {variant} {{ return \"{variant}\" }}\n" + )); + } + Some(payload_ty) => { + // Payload variant: `()`. Formatting + // depends on whether the inner repr fits on one line: + // + // single-line: `Variant(inner)` + // multi-line: `Variant(\n line1\n line2\n)` + // + // The multi-line shape (opening paren followed by + // newline + 2-space indent for the first child, + // closing paren on its own line, every inner line + // indented one extra level) keeps deeply-nested + // values readable instead of arrowing off the right + // margin. + // + // 2-space indent applies to ALL inner lines + // (including their pre-existing continuation indents), + // so each nesting level adds exactly 2 spaces of + // indent uniformly. + let dispatch = dispatch_name(payload_ty); + out.push_str(&format!( + " {variant} {{\n \ + set __vw_inner [{dispatch} -v [lindex $v 1]]\n \ + if {{[string first \"\\n\" $__vw_inner] >= 0}} {{\n \ + set __vw_indented [string map [list \\n \"\\n \"] $__vw_inner]\n \ + return \"{variant}(\\n $__vw_indented\\n)\"\n \ + }} else {{\n \ + return \"{variant}($__vw_inner)\"\n \ + }}\n \ + }}\n" + )); + } + } +} + +fn emit_recursive( + ty: &TypeExpr, + out: &mut Vec, + seen: &mut HashSet, + types: &HashMap, +) { + match ty { + TypeExpr::Named { name, .. } => { + // No codegen for plain names directly — `::repr` + // is either a primitive (shipped via + // `emit_primitive_prelude`) or a user newtype + // (validator-enforced to exist). BUT if `name` + // resolves to a user newtype whose underlying is a + // generic, we have to recurse so the underlying's + // monomorphized repr is shipped — the user's + // `proc ::repr` body typically delegates to it. + if let Some(decl) = types.get(name.as_str()) { + if let Some(underlying) = decl.underlying.as_ref() { + emit_recursive(underlying, out, seen, types); + } + } + } + TypeExpr::Generic { name, args, .. } => { + // Depth-first: emit each arg's repr first so this + // proc's body can call them. + for a in args { + emit_recursive(a, out, seen, types); + } + let m = mangle(ty); + if !seen.insert(m.clone()) { + return; // Already emitted this instantiation. + } + let body = match name.as_str() { + "dict" if args.len() == 2 => { + emit_dict_repr(&m, &args[0], &args[1]) + } + "list" if args.len() == 1 => emit_list_repr(&m, &args[0]), + _ => emit_unknown_generic_repr(&m), + }; + out.push(body); + } + TypeExpr::Qualified { + namespace, variant, .. + } => { + // Mirror of `mangle`'s guard — Qualified types must + // not reach codegen at a value position. + panic!( + "internal error: TypeExpr::Qualified `{namespace}::{variant}` \ + reached emit_recursive — validator should have rejected this" + ); + } + } +} + +/// `dict::repr` — iterate pairs, format each as +/// ` ` joined with newlines. +/// +/// The body uses braced `expr {…}` and avoids interpolating the +/// dispatch names raw via `quote_tcl!` because Tcl's word quoting +/// would brace the `::` separators (those are bare-safe but the +/// macro's `Word::lit` doesn't know that). The proc names go in via +/// raw substitution at template time instead — they're already +/// valid Tcl, and the macro template's literal regions pass through +/// untouched. +fn emit_dict_repr(mangled: &str, k: &TypeExpr, v: &TypeExpr) -> String { + let key_repr = dispatch_name(k); + let val_repr = dispatch_name(v); + let key_to_raw = to_raw_dispatch_name(k); + let val_to_raw = to_raw_dispatch_name(v); + let key_from_raw = from_raw_dispatch_name(k); + let val_from_raw = from_raw_dispatch_name(v); + // Uses the same kwargs envelope as `emit_primitive_prelude` + // so the dispatch site can uniformly call all reprs with + // `-v `. Sub-element reprs are invoked through the + // same `-v` convention. + // + // to_raw / from_raw are emitted in the SAME namespace so + // callers can dispatch via `::{repr,to_raw,from_raw}` + // uniformly. to_raw walks the dict, applying K::to_raw and + // V::to_raw element-wise and rebuilding as a flat paired + // list (the shape Vivado consumes). from_raw is the inverse + // — walks a paired list, applies K::from_raw / V::from_raw + // element-wise, builds a typed dict. + format!( + "namespace eval {ns} {{\n \ + proc repr {{args}} {{\n \ + ::vw::kwargs $args {{v \"\"}}\n \ + set out \"\"\n \ + set first 1\n \ + foreach {{k val}} $v {{\n \ + if {{!$first}} {{ append out \"\\n\" }}\n \ + set first 0\n \ + append out [{kr} -v $k] \" \" [{vr} -v $val]\n \ + }}\n \ + return $out\n \ + }}\n \ + proc to_raw {{args}} {{\n \ + ::vw::kwargs $args {{v \"\"}}\n \ + set out [list]\n \ + foreach {{k val}} $v {{\n \ + lappend out [{ktr} -v $k] [{vtr} -v $val]\n \ + }}\n \ + return $out\n \ + }}\n \ + proc from_raw {{args}} {{\n \ + ::vw::kwargs $args {{v \"\"}}\n \ + set out [dict create]\n \ + foreach {{k val}} $v {{\n \ + dict set out [{kfr} -v $k] [{vfr} -v $val]\n \ + }}\n \ + return $out\n \ + }}\n\ + }}\n", + ns = mangled, + kr = key_repr, + vr = val_repr, + ktr = key_to_raw, + vtr = val_to_raw, + kfr = key_from_raw, + vfr = val_from_raw, + ) +} + +/// `list::repr` — iterate elements, format each via `T::repr`, +/// join with newlines. Also emits `to_raw` / `from_raw` element- +/// wise dispatching through `T::to_raw` / `T::from_raw`. +fn emit_list_repr(mangled: &str, elem: &TypeExpr) -> String { + let elem_repr = dispatch_name(elem); + let elem_to_raw = to_raw_dispatch_name(elem); + let elem_from_raw = from_raw_dispatch_name(elem); + format!( + "namespace eval {ns} {{\n \ + proc repr {{args}} {{\n \ + ::vw::kwargs $args {{v \"\"}}\n \ + set out \"\"\n \ + set first 1\n \ + foreach item $v {{\n \ + if {{!$first}} {{ append out \"\\n\" }}\n \ + set first 0\n \ + append out [{er} -v $item]\n \ + }}\n \ + return $out\n \ + }}\n \ + proc to_raw {{args}} {{\n \ + ::vw::kwargs $args {{v \"\"}}\n \ + set out [list]\n \ + foreach item $v {{\n \ + lappend out [{etr} -v $item]\n \ + }}\n \ + return $out\n \ + }}\n \ + proc from_raw {{args}} {{\n \ + ::vw::kwargs $args {{v \"\"}}\n \ + set out [list]\n \ + foreach item $v {{\n \ + lappend out [{efr} -v $item]\n \ + }}\n \ + return $out\n \ + }}\n\ + }}\n", + ns = mangled, + er = elem_repr, + etr = elem_to_raw, + efr = elem_from_raw, + ) +} + +/// Fallback for generic shapes we don't have a specialized shell +/// for (e.g. a hypothetical `tuple<…>` we haven't designed yet). +/// Renders the raw Tcl value — at least the user sees *something* +/// instead of an "unknown generic" error. +fn emit_unknown_generic_repr(mangled: &str) -> String { + format!( + "namespace eval {mangled} {{ \ + proc repr {{args}} {{ ::vw::kwargs $args {{v \"\"}}; return $v }} \ + proc to_raw {{args}} {{ ::vw::kwargs $args {{v \"\"}}; return $v }} \ + proc from_raw {{args}} {{ ::vw::kwargs $args {{v \"\"}}; return $v }} \ + }}\n" + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::span::Span; + + fn named(name: &str) -> TypeExpr { + TypeExpr::Named { + name: name.into(), + span: Span::new(0, 0), + } + } + + fn generic(name: &str, args: Vec) -> TypeExpr { + TypeExpr::Generic { + name: name.into(), + name_span: Span::new(0, 0), + args, + span: Span::new(0, 0), + } + } + + #[test] + fn mangle_primitives() { + assert_eq!(mangle(&named("string")), "string"); + assert_eq!(mangle(&named("bd_cell")), "bd_cell"); + assert_eq!(mangle(&named("unit")), "unit"); + } + + #[test] + fn mangle_dict_two_args() { + let ty = generic("dict", vec![named("string"), named("int")]); + assert_eq!(mangle(&ty), "dict_string_int"); + } + + #[test] + fn mangle_list_one_arg() { + let ty = generic("list", vec![named("bd_cell")]); + assert_eq!(mangle(&ty), "list_bd_cell"); + } + + #[test] + fn mangle_nested() { + let inner = generic("dict", vec![named("string"), named("bd_cell")]); + let outer = generic("list", vec![inner]); + assert_eq!(mangle(&outer), "list_dict_string_bd_cell"); + } + + #[test] + fn dispatch_for_primitive_uses_name() { + assert_eq!(dispatch_name(&named("string")), "string::repr"); + assert_eq!(dispatch_name(&named("bd_cell")), "bd_cell::repr"); + } + + #[test] + fn dispatch_for_generic_uses_mangled() { + let ty = generic("dict", vec![named("string"), named("string")]); + assert_eq!(dispatch_name(&ty), "dict_string_string::repr"); + } + + #[test] + fn primitive_prelude_emits_one_namespace_block_per_type() { + let procs = emit_primitive_prelude(); + // 4 types, each emitted as a single `namespace eval` + // block that internally defines repr/from/to. + assert_eq!(procs.len(), 4); + assert!(procs.iter().any(|p| p.contains("namespace eval string"))); + assert!(procs.iter().any(|p| p.contains("namespace eval int"))); + assert!(procs.iter().any(|p| p.contains("namespace eval bool"))); + assert!(procs.iter().any(|p| p.contains("namespace eval unit"))); + // Each block contains the full triplet (repr + from + to) + // and uses `return` in every body. + for p in &procs { + assert!(p.contains("proc repr"), "missing repr in: {p}"); + assert!(p.contains("proc from"), "missing from in: {p}"); + assert!(p.contains("proc to"), "missing to in: {p}"); + } + } + + #[test] + fn emit_repr_named_emits_no_procs() { + // Primitives and user newtypes don't need codegen — repr + // lives in the primitive prelude or the user's own proc. + let e = emit_repr(&named("string")); + assert!(e.procs.is_empty()); + assert_eq!(e.dispatch, "string::repr"); + + let e = emit_repr(&named("bd_cell")); + assert!(e.procs.is_empty()); + assert_eq!(e.dispatch, "bd_cell::repr"); + } + + #[test] + fn emit_repr_dict_string_string() { + let ty = generic("dict", vec![named("string"), named("string")]); + let e = emit_repr(&ty); + assert_eq!(e.dispatch, "dict_string_string::repr"); + assert_eq!(e.procs.len(), 1); + let body = &e.procs[0]; + // The proc is defined inside its `namespace eval`, so the + // textual proc name is just `repr` — the namespace is in + // the surrounding `namespace eval dict_string_string`. + assert!(body.contains("namespace eval dict_string_string")); + assert!(body.contains("proc repr {args}")); + assert!(body.contains("::vw::kwargs $args")); + assert!(body.contains("foreach {k val} $v")); + // Element reprs called by their fully-qualified name via + // the universal `-v ` kwargs form. + assert!(body.contains("[string::repr -v $k]")); + assert!(body.contains("[string::repr -v $val]")); + } + + #[test] + fn emit_repr_list_bd_cell() { + let ty = generic("list", vec![named("bd_cell")]); + let e = emit_repr(&ty); + assert_eq!(e.dispatch, "list_bd_cell::repr"); + assert_eq!(e.procs.len(), 1); + let body = &e.procs[0]; + assert!(body.contains("namespace eval list_bd_cell")); + assert!(body.contains("proc repr {args}")); + assert!(body.contains("[bd_cell::repr -v $item]")); + } + + #[test] + fn emit_repr_nested_topologically_orders_sub_procs() { + // dict>: emits list::repr first, + // then dict_string_list_int::repr. + let inner = generic("list", vec![named("int")]); + let outer = generic("dict", vec![named("string"), inner]); + let e = emit_repr(&outer); + assert_eq!(e.dispatch, "dict_string_list_int::repr"); + assert_eq!(e.procs.len(), 2); + // First proc emitted is the inner list, second is the + // outer dict. + assert!(e.procs[0].contains("namespace eval list_int")); + assert!(e.procs[1].contains("namespace eval dict_string_list_int")); + // Outer body calls the inner by its fully-qualified name. + assert!(e.procs[1].contains("[list_int::repr")); + } + + #[test] + fn emit_repr_dedups_repeated_subtypes() { + // dict — bd_cell is a leaf (Named), so no + // codegen for it, but if we had dict, list> + // we'd want list_int::repr emitted only ONCE. + let inner = generic("list", vec![named("int")]); + let outer = generic("dict", vec![inner.clone(), inner]); + let e = emit_repr(&outer); + // list_int's namespace block appears once even though it's + // referenced twice in the outer dict. + let list_int_count = e + .procs + .iter() + .filter(|p| p.contains("namespace eval list_int ")) + .count(); + assert_eq!(list_int_count, 1); + } + + #[test] + fn emit_repr_unknown_generic_falls_back_to_identity() { + let ty = generic("tuple", vec![named("string"), named("int")]); + let e = emit_repr(&ty); + assert_eq!(e.procs.len(), 1); + assert!( + e.procs[0].contains("return $v"), + "expected identity body, got {:?}", + e.procs[0] + ); + } + + #[test] + fn is_primitive_table() { + assert!(is_primitive("string")); + assert!(is_primitive("int")); + assert!(is_primitive("bool")); + assert!(is_primitive("unit")); + assert!(!is_primitive("bd_cell")); + assert!(!is_primitive("widget")); + assert!(!is_primitive("dict")); + } + + // --- enum prelude emission -------------------------------------- + + fn ed_with_variants( + name: &str, + vs: Vec<(&str, Option)>, + ) -> EnumDecl { + EnumDecl { + name: Some(name.into()), + name_span: Span::new(0, 0), + variants: vs + .into_iter() + .map(|(n, p)| EnumVariant { + name: n.into(), + name_span: Span::new(0, 0), + payload: p, + payload_span: Span::new(0, 0), + span: Span::new(0, 0), + }) + .collect(), + body_span: Span::new(0, 0), + } + } + + #[test] + fn enum_prelude_with_payload_variants() { + let ed = ed_with_variants( + "Property", + vec![ + ("Scalar", Some(named("string"))), + ( + "Nested", + Some(generic( + "dict", + vec![named("string"), named("string")], + )), + ), + ], + ); + let p = emit_enum_prelude(&ed); + // Wrapped in namespace eval. + assert!(p.contains("namespace eval Property")); + // Constructors with payload. + assert!(p.contains("proc Scalar {v} { return [list Scalar $v] }")); + assert!(p.contains("proc Nested {v} { return [list Nested $v] }")); + // Accessors. + assert!(p.contains("proc tag {v}")); + assert!(p.contains("proc payload {v}")); + // Repr switch — kwargs envelope around the body. + assert!(p.contains("proc repr {args}")); + assert!(p.contains("::vw::kwargs $args")); + assert!(p.contains("switch -- [lindex $v 0]")); + // Each variant's body now uses an intermediate + // `__vw_inner` after applying the continuation-indent + // `string map` transform. + assert!(p.contains("Scalar($__vw_inner)")); + assert!(p.contains("Nested($__vw_inner)")); + // Payload reprs dispatched via mangled names with `-v`. + assert!(p.contains("string::repr -v")); + assert!(p.contains("dict_string_string::repr -v")); + // Identity from/to — also kwargs envelope. + assert!(p.contains("proc from {args}")); + assert!(p.contains("proc to {args}")); + } + + #[test] + fn enum_prelude_with_empty_payload_variants() { + let ed = ed_with_variants( + "Direction", + vec![ + ("North", None), + ("South", None), + ("East", None), + ("West", None), + ], + ); + let p = emit_enum_prelude(&ed); + // Empty-payload constructors take no args. + assert!(p.contains("proc North {} { return [list North] }")); + assert!(p.contains("proc West {} { return [list West] }")); + // Repr arms render bare variant name (no parens). + assert!(p.contains("North { return \"North\" }")); + assert!(p.contains("West { return \"West\" }")); + // No `(` after variant names in the repr arms. + let arm = "North { return \"North("; + assert!(!p.contains(arm), "shouldn't have parens for empty variants"); + } + + #[test] + fn enum_prelude_mixed_payload_and_empty() { + let ed = ed_with_variants( + "Maybe", + vec![("Some", Some(named("int"))), ("None", None)], + ); + let p = emit_enum_prelude(&ed); + assert!(p.contains("proc Some {v} { return [list Some $v] }")); + assert!(p.contains("proc None {} { return [list None] }")); + // Payload arm uses `__vw_inner` after the + // continuation-indent `string map` transform. + assert!(p.contains("int::repr -v")); + assert!(p.contains("Some($__vw_inner)")); + assert!(p.contains("None { return \"None\" }")); + } +} diff --git a/vw-htcl/src/scope.rs b/vw-htcl/src/scope.rs new file mode 100644 index 0000000..24f74ac --- /dev/null +++ b/vw-htcl/src/scope.rs @@ -0,0 +1,208 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Variable scope resolution shared by goto and hover. +//! +//! Tcl variables are local to a proc (its parameters plus whatever it +//! `set`s / `variable`s); top-level code shares the global scope. That +//! lexical model is enough to point a `$name` reference at its +//! definition. +//! +//! Two entry styles: +//! +//! - [`resolve_var_def`] resolves a name given a known scope — used by +//! the structured path when the reference is a real [`WordPart::VarRef`]. +//! - [`scan_var_ref`] + [`innermost_scope`] recover a reference the +//! structured parser left buried in opaque text (a command +//! substitution body, or an `if`/`while` condition), by reading the +//! raw source at the cursor and locating the enclosing proc by span. + +use crate::ast::{Command, CommandKind, Document, Proc, ProcArg, Stmt}; +use crate::span::Span; + +/// What a `$name` reference resolves to. +#[derive(Clone, Copy, Debug)] +pub enum VarDef<'a> { + /// A parameter of the enclosing proc. + Param(&'a ProcArg), + /// A local established by `set name ...` or `variable name ...`. + /// Carries the span of the defined name. + Local(Span), +} + +impl VarDef<'_> { + /// The span to navigate to / anchor hover on. + pub fn def_span(&self) -> Span { + match self { + VarDef::Param(arg) => arg.name_span, + VarDef::Local(span) => *span, + } + } +} + +/// Resolve `name` within `scope_stmts` (the statements of the current +/// scope), falling back to a parameter of `enclosing`. `offset` biases +/// local resolution toward the last definition at or before the +/// reference (the value in effect there). +pub fn resolve_var_def<'a>( + name: &str, + scope_stmts: &'a [Stmt], + enclosing: Option<&'a Proc>, + offset: u32, +) -> Option> { + let mut best: Option = None; + let mut first: Option = None; + for stmt in scope_stmts { + let Stmt::Command(cmd) = stmt else { continue }; + let Some(def) = local_def_target(cmd, name) else { + continue; + }; + first.get_or_insert(def); + if def.start <= offset { + best = Some(def); + } + } + if let Some(span) = best.or(first) { + return Some(VarDef::Local(span)); + } + + let sig = enclosing?.signature.as_ref()?; + sig.args.iter().find(|a| a.name == name).map(VarDef::Param) +} + +/// If `cmd` defines variable `name` (`set name ...` or `variable +/// name ...`), return the span of the defined name. +fn local_def_target(cmd: &Command, name: &str) -> Option { + match &cmd.kind { + CommandKind::Set => { + let target = cmd.words.get(1)?; + (target.as_text()? == name).then_some(target.span) + } + CommandKind::Generic => { + if cmd.words.first()?.as_text()? != "variable" { + return None; + } + let target = cmd.words.get(1)?; + (target.as_text()? == name).then_some(target.span) + } + CommandKind::Proc(_) + | CommandKind::Src(_) + | CommandKind::NamespaceEval(_) + | CommandKind::TypeDecl(_) + | CommandKind::EnumDecl(_) => None, + } +} + +/// The innermost proc whose body contains `offset`, together with that +/// body's statements. `(document.stmts, None)` when `offset` is at the +/// top level. +pub fn innermost_scope( + document: &Document, + offset: u32, +) -> (&[Stmt], Option<&Proc>) { + fn helper(stmts: &[Stmt], offset: u32) -> Option<(&[Stmt], &Proc)> { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + let CommandKind::Proc(proc) = &cmd.kind else { + continue; + }; + if proc.body_span.contains(offset) { + return Some( + helper(&proc.body, offset).unwrap_or((&proc.body, proc)), + ); + } + } + None + } + match helper(&document.stmts, offset) { + Some((stmts, proc)) => (stmts, Some(proc)), + None => (&document.stmts, None), + } +} + +/// If the cursor at `offset` sits on a `$name` (or `${name}`) +/// reference — even one the structured parser left inside opaque text +/// (a command substitution, or an expr condition) — return its name +/// and the span of the whole reference. +pub fn scan_var_ref(source: &str, offset: u32) -> Option<(String, Span)> { + let bytes = source.as_bytes(); + let len = bytes.len(); + let off = (offset as usize).min(len); + let is_ident = |b: u8| b.is_ascii_alphanumeric() || b == b'_' || b == b':'; + + // If the cursor sits on the `$` itself, step into the name. + let probe = if off < len && bytes[off] == b'$' { + off + 1 + } else { + off + }; + let probe = probe.min(len); + + let mut start = probe; + while start > 0 && is_ident(bytes[start - 1]) { + start -= 1; + } + let mut end = probe; + while end < len && is_ident(bytes[end]) { + end += 1; + } + if end <= start { + return None; + } + + // `$name` + if start > 0 && bytes[start - 1] == b'$' { + let name = source.get(start..end)?.to_string(); + return Some((name, Span::new((start - 1) as u32, end as u32))); + } + // `${name}` + if start >= 2 + && bytes[start - 1] == b'{' + && bytes[start - 2] == b'$' + && end < len + && bytes[end] == b'}' + { + let name = source.get(start..end)?.to_string(); + return Some((name, Span::new((start - 2) as u32, (end + 1) as u32))); + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn scan_finds_bare_var_from_within() { + let src = "puts $kind here"; + // cursor on the `i` of `$kind` + let pos = (src.find("kind").unwrap() + 1) as u32; + let (name, span) = scan_var_ref(src, pos).unwrap(); + assert_eq!(name, "kind"); + assert_eq!(span.slice(src), "$kind"); + } + + #[test] + fn scan_finds_var_on_dollar() { + let src = "x $y"; + let pos = src.find('$').unwrap() as u32; + let (name, _) = scan_var_ref(src, pos).unwrap(); + assert_eq!(name, "y"); + } + + #[test] + fn scan_finds_braced_var() { + let src = "a ${foo} b"; + let pos = (src.find("foo").unwrap() + 1) as u32; + let (name, span) = scan_var_ref(src, pos).unwrap(); + assert_eq!(name, "foo"); + assert_eq!(span.slice(src), "${foo}"); + } + + #[test] + fn scan_returns_none_off_a_var() { + let src = "plain text"; + assert!(scan_var_ref(src, 2).is_none()); + } +} diff --git a/vw-htcl/src/signature_help.rs b/vw-htcl/src/signature_help.rs new file mode 100644 index 0000000..1929262 --- /dev/null +++ b/vw-htcl/src/signature_help.rs @@ -0,0 +1,159 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Signature help for htcl proc calls. +//! +//! When the cursor is inside a call to a known `proc`, report that +//! proc's signature and which parameter is "active" so the editor can +//! highlight it. The active parameter is the one named by the most +//! recent `-flag` typed on the line; before any flag is typed there is +//! no active parameter (the whole signature is shown). +//! +//! Pure analysis, like [`crate::complete`]: the LSP backend turns the +//! returned [`SignatureHelp`] into `lsp_types::SignatureHelp`. + +use crate::ast::{CommandKind, Document, ProcSignature, Stmt}; +use crate::cmdline::{self, CmdLine}; + +#[derive(Clone, Debug)] +pub struct SignatureHelp<'a> { + pub proc_name: String, + pub signature: &'a ProcSignature, + /// Proc-level doc comments (`##` above the declaration). + pub doc_comments: &'a [String], + /// Index into `signature.args` of the parameter under the cursor, + /// if one is determinable. + pub active_parameter: Option, +} + +/// Signature help for the call the cursor at `offset` is inside, or +/// `None` if the cursor isn't in a known proc call. +pub fn signature_help_at<'a>( + document: &'a Document, + source: &str, + offset: u32, +) -> Option> { + let line = cmdline::analyze(source, offset); + // `command_name` is `None` while the cursor is still on the first + // word, which is exactly when there's no call to describe yet. + let name = line.command_name()?; + let (signature, doc_comments) = find_proc(document, name)?; + Some(SignatureHelp { + proc_name: name.to_string(), + signature, + doc_comments, + active_parameter: active_parameter(signature, &line), + }) +} + +fn find_proc<'a>( + document: &'a Document, + name: &str, +) -> Option<(&'a ProcSignature, &'a [String])> { + for stmt in &document.stmts { + let Stmt::Command(cmd) = stmt else { continue }; + let CommandKind::Proc(proc) = &cmd.kind else { + continue; + }; + if proc.name.as_deref() == Some(name) { + return Some((proc.signature.as_ref()?, &cmd.doc_comments)); + } + } + None +} + +/// The active parameter is the arg named by the most recent `-flag` +/// token. Complete flags must name an arg exactly; a flag still being +/// typed (the partial word) matches by prefix so the highlight tracks +/// as the user types. +fn active_parameter(sig: &ProcSignature, line: &CmdLine<'_>) -> Option { + let mut active = None; + for word in line.words.iter().skip(1) { + if let Some(flag) = word.strip_prefix('-') { + if let Some(i) = sig.args.iter().position(|a| a.name == flag) { + active = Some(i as u32); + } + } + } + if let Some(flag) = line.partial.strip_prefix('-') { + if !flag.is_empty() { + if let Some(i) = + sig.args.iter().position(|a| a.name.starts_with(flag)) + { + return Some(i as u32); + } + } + } + active +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::parse; + + fn cursor(src_with_marker: &str) -> (String, u32) { + let offset = src_with_marker.find('|').expect("no cursor marker"); + (src_with_marker.replacen('|', "", 1), offset as u32) + } + + fn help(src_with_marker: &str) -> Option<(String, Option)> { + let (src, off) = cursor(src_with_marker); + let parsed = parse(&src); + signature_help_at(&parsed.document, &src, off) + .map(|h| (h.proc_name, h.active_parameter)) + } + + #[test] + fn shows_signature_after_name() { + let src = "\ +proc cfg {\n width\n depth\n} { }\n\ +cfg |\n"; + let (name, active) = help(src).unwrap(); + assert_eq!(name, "cfg"); + assert_eq!(active, None); + } + + #[test] + fn active_parameter_follows_last_flag() { + let src = "\ +proc cfg {\n width\n depth\n} { }\n\ +cfg -depth |\n"; + let (_, active) = help(src).unwrap(); + assert_eq!(active, Some(1)); + } + + #[test] + fn active_parameter_tracks_partial_flag() { + let src = "\ +proc cfg {\n width\n depth\n} { }\n\ +cfg -wid|\n"; + let (_, active) = help(src).unwrap(); + assert_eq!(active, Some(0)); + } + + #[test] + fn none_while_typing_proc_name() { + let src = "\ +proc cfg {\n width\n} { }\n\ +cf|\n"; + assert!(help(src).is_none()); + } + + #[test] + fn none_for_unknown_command() { + let src = "puts |\n"; + assert!(help(src).is_none()); + } + + #[test] + fn works_inside_proc_body() { + let src = "\ +proc helper {\n size\n} { }\n\ +proc outer {} {\n helper -size |\n}\n"; + let (name, active) = help(src).unwrap(); + assert_eq!(name, "helper"); + assert_eq!(active, Some(0)); + } +} diff --git a/vw-htcl/src/span.rs b/vw-htcl/src/span.rs new file mode 100644 index 0000000..5dfb736 --- /dev/null +++ b/vw-htcl/src/span.rs @@ -0,0 +1,55 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Byte-offset spans over source text. + +use std::ops::Range; + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +pub struct Span { + pub start: u32, + pub end: u32, +} + +impl Span { + pub const fn new(start: u32, end: u32) -> Self { + Self { start, end } + } + + pub fn range(self) -> Range { + self.start as usize..self.end as usize + } + + pub fn slice(self, source: &str) -> &str { + &source[self.range()] + } + + pub fn merge(self, other: Span) -> Span { + Span::new(self.start.min(other.start), self.end.max(other.end)) + } + + /// Translate this span by `delta` bytes. Used to lift spans from a + /// sub-parse (e.g. a proc body parsed as its own fragment) back + /// into whole-source coordinates. + pub const fn shifted(self, delta: u32) -> Span { + Span::new(self.start + delta, self.end + delta) + } + + /// True if `offset` lies within this span (start-inclusive, + /// end-inclusive). End-inclusive is the right call for hover and + /// "what's at the cursor" queries: a cursor visually positioned + /// right after a token is still on it. + pub fn contains(self, offset: u32) -> bool { + offset >= self.start && offset <= self.end + } +} + +impl From> for Span { + fn from(range: Range) -> Self { + Self { + start: range.start as u32, + end: range.end as u32, + } + } +} diff --git a/vw-htcl/src/src_path.rs b/vw-htcl/src/src_path.rs new file mode 100644 index 0000000..f2154f2 --- /dev/null +++ b/vw-htcl/src/src_path.rs @@ -0,0 +1,253 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Classification and resolution of `src` import paths. +//! +//! The plan defines three path shapes: +//! +//! - `relative/path` — relative to the importing file's directory. +//! - `/absolute/path` — filesystem-absolute (allowed but discouraged). +//! - `@name/path` — resolved via `vw.toml`'s `[dependencies.]` +//! entry; the cached repo root comes from `vw-lib`'s dependency +//! resolver and `` plus the rest of the path identify a file +//! in that repo. +//! +//! Resolution is split into two stages so the parser/AST side has no +//! filesystem dependency: [`classify`] decides which shape a path is, +//! [`Resolver`] turns a classified path into an actual on-disk file. + +use std::path::{Path, PathBuf}; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum PathKind { + /// Relative to the importing file's directory. + Relative, + /// Filesystem-absolute (starts with `/`). + Absolute, + /// Resolved via a workspace dependency named `name`. `subpath` is + /// the rest of the path after `@name/` (may be empty). + Named { name: String, subpath: String }, +} + +#[derive(Clone, Debug)] +pub struct ClassifiedPath<'a> { + pub kind: PathKind, + /// The original path text, retained for diagnostics. + pub raw: &'a str, +} + +/// Classify an import path. Doesn't touch the filesystem. +pub fn classify(path: &str) -> ClassifiedPath<'_> { + let kind = if let Some(rest) = path.strip_prefix('@') { + let (name, subpath) = match rest.split_once('/') { + Some((n, s)) => (n.to_string(), s.to_string()), + None => (rest.to_string(), String::new()), + }; + PathKind::Named { name, subpath } + } else if path.starts_with('/') { + PathKind::Absolute + } else { + PathKind::Relative + }; + ClassifiedPath { kind, raw: path } +} + +#[derive(Debug, thiserror::Error)] +pub enum ResolveError { + #[error( + "unknown dependency `{name}` in `src @{name}{}`; \ + add a `[dependencies.{name}]` entry to your workspace's \ + vw.toml or run `vw add` to fetch it", + if .subpath.is_empty() { String::new() } else { format!("/{}", .subpath) } + )] + UnknownDependency { name: String, subpath: String }, + + #[error("imported file does not exist: {path}")] + NotFound { path: PathBuf }, + + #[error( + "import path `{raw}` reduces to an empty file path; \ + a `src` must name a real file" + )] + EmptyPath { raw: String }, +} + +/// Bare `src @` resolves to `/{DEFAULT_MODULE}.htcl`. +/// The convention is intentionally fixed (no `vw.toml` knob) so every +/// htcl module is laid out the same way — a reader can open +/// `module.htcl` and know they're at the entry point. +pub const DEFAULT_MODULE: &str = "module"; + +/// Resolver that turns import paths into on-disk file paths. Construct +/// one per workspace and reuse it across imports. +/// +/// Named deps are looked up in `cached_deps`, a `name → cache root` +/// map normally built from `vw.lock` via `vw-lib`. The caller is +/// responsible for filling this in — the htcl crate stays free of +/// `vw-lib` and filesystem-cache concerns. +#[derive(Clone, Debug, Default)] +pub struct Resolver { + cached_deps: std::collections::HashMap, +} + +impl Resolver { + pub fn new() -> Self { + Self::default() + } + + /// Register a dependency's cached root path (typically + /// `~/.vw/deps/-`). + pub fn with_dep(mut self, name: impl Into, root: PathBuf) -> Self { + self.cached_deps.insert(name.into(), root); + self + } + + /// Iterate the registered dependencies as `(name, root)` pairs. + /// Order is unspecified — callers that care should sort. + pub fn deps(&self) -> impl Iterator { + self.cached_deps + .iter() + .map(|(k, v)| (k.as_str(), v.as_path())) + } + + /// Look up a dependency's cached root by name. + pub fn dep_root(&self, name: &str) -> Option<&Path> { + self.cached_deps.get(name).map(PathBuf::as_path) + } + + /// Resolve `path` (as written in a `src` statement) against the + /// directory containing the importing file. Returns the canonical + /// path to the imported file, with `.htcl` appended if absent. + pub fn resolve( + &self, + importing_file_dir: &Path, + path: &str, + ) -> Result { + let classified = classify(path); + let candidate = match &classified.kind { + PathKind::Relative => importing_file_dir.join(path), + PathKind::Absolute => PathBuf::from(path), + PathKind::Named { name, subpath } => { + let Some(root) = self.cached_deps.get(name) else { + return Err(ResolveError::UnknownDependency { + name: name.clone(), + subpath: subpath.clone(), + }); + }; + // Bare `@` resolves to the dep's default entry + // point — `module.htcl` at the dep root, analogous to + // Rust's `src/lib.rs`. `@/` still picks a + // specific module under the dep. + if subpath.is_empty() { + root.join(DEFAULT_MODULE) + } else { + root.join(subpath) + } + } + }; + + let with_ext = if candidate.extension().is_some() { + candidate.clone() + } else { + candidate.with_extension("htcl") + }; + + if !with_ext.exists() { + return Err(ResolveError::NotFound { path: with_ext }); + } + Ok(with_ext.canonicalize().unwrap_or(with_ext)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + #[test] + fn classify_relative() { + assert_eq!(classify("foo/bar").kind, PathKind::Relative); + assert_eq!(classify("bar").kind, PathKind::Relative); + } + + #[test] + fn classify_absolute() { + assert_eq!(classify("/opt/x/y").kind, PathKind::Absolute); + } + + #[test] + fn classify_named() { + assert_eq!( + classify("@quartz/ip/bacd").kind, + PathKind::Named { + name: "quartz".into(), + subpath: "ip/bacd".into() + } + ); + assert_eq!( + classify("@bare").kind, + PathKind::Named { + name: "bare".into(), + subpath: String::new() + } + ); + } + + fn fixture() -> (tempfile::TempDir, Resolver) { + let dir = tempfile::tempdir().unwrap(); + let dep_root = dir.path().join("dep"); + fs::create_dir_all(dep_root.join("ip")).unwrap(); + fs::write(dep_root.join("ip").join("bacd.htcl"), "## stub\n").unwrap(); + fs::write(dir.path().join("local.htcl"), "## local\n").unwrap(); + let resolver = Resolver::new().with_dep("quartz", dep_root); + (dir, resolver) + } + + #[test] + fn resolve_relative_appends_htcl() { + let (dir, resolver) = fixture(); + let resolved = resolver.resolve(dir.path(), "local").unwrap(); + assert_eq!( + resolved.file_name().and_then(|s| s.to_str()), + Some("local.htcl") + ); + } + + #[test] + fn resolve_named_dependency() { + let (dir, resolver) = fixture(); + let resolved = resolver.resolve(dir.path(), "@quartz/ip/bacd").unwrap(); + assert!(resolved.ends_with("dep/ip/bacd.htcl"), "{resolved:?}"); + } + + #[test] + fn bare_named_dep_resolves_to_module_htcl() { + // `src @quartz` → `/module.htcl` (analogous to + // Rust's `use crate` resolving to `src/lib.rs`). + let dir = tempfile::tempdir().unwrap(); + let dep_root = dir.path().join("dep"); + fs::create_dir_all(&dep_root).unwrap(); + fs::write(dep_root.join("module.htcl"), "# entry\n").unwrap(); + let resolver = Resolver::new().with_dep("quartz", dep_root.clone()); + let resolved = resolver.resolve(dir.path(), "@quartz").unwrap(); + assert!(resolved.ends_with("dep/module.htcl"), "{resolved:?}"); + } + + #[test] + fn unknown_dep_errors_cleanly() { + let (dir, resolver) = fixture(); + let err = resolver.resolve(dir.path(), "@nope/foo").unwrap_err(); + assert!( + matches!(err, ResolveError::UnknownDependency { .. }), + "{err:?}" + ); + } + + #[test] + fn missing_file_errors() { + let (dir, resolver) = fixture(); + let err = resolver.resolve(dir.path(), "does/not/exist").unwrap_err(); + assert!(matches!(err, ResolveError::NotFound { .. }), "{err:?}"); + } +} diff --git a/vw-htcl/src/type_parse.rs b/vw-htcl/src/type_parse.rs new file mode 100644 index 0000000..f61d6a0 --- /dev/null +++ b/vw-htcl/src/type_parse.rs @@ -0,0 +1,415 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Mini-parser for htcl type expressions. +//! +//! Grammar: +//! +//! ```text +//! Type ::= Ident ('::' Ident | '<' Type (',' Type)* '>')? +//! Ident ::= [A-Za-z_] [A-Za-z0-9_]* +//! ``` +//! +//! The `Ident '::' Ident` form yields a [`TypeExpr::Qualified`], +//! used for the `Enum::Variant` annotations on overloaded handler +//! procs. The two forms (qualified vs generic) are mutually +//! exclusive — `Enum::Variant<…>` is a parse error. +//! +//! Whitespace is permitted between tokens but not within identifiers. +//! That's why type expressions with whitespace (`dict`) +//! must be brace-wrapped when used as a single htcl word — `dict` parses as four htcl words at the parent level, but +//! `{dict}` parses as one. The caller of [`parse`] is +//! responsible for that unwrap before handing us the type text. +//! +//! Spans returned are absolute source spans: the caller passes a +//! `base_offset` corresponding to the byte position of the first +//! character of `text` in the original source. + +use crate::ast::TypeExpr; +use crate::span::Span; + +/// One parse-error from the type parser. The caller renders these as +/// regular htcl parse-error diagnostics. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TypeParseError { + pub message: String, + pub span: Span, +} + +/// Parse `text` as a type expression, with absolute source positions +/// rooted at `base_offset`. Returns the parsed expression on success, +/// or the first error encountered. +pub fn parse(text: &str, base_offset: u32) -> Result { + let mut p = Parser::new(text, base_offset); + let ty = p.parse_type()?; + p.skip_ws(); + if !p.eof() { + return Err(TypeParseError { + message: format!( + "unexpected `{}` after type expression", + p.rest().chars().next().unwrap_or('\0') + ), + span: p.here_span(), + }); + } + Ok(ty) +} + +struct Parser<'a> { + text: &'a str, + bytes: &'a [u8], + pos: usize, + base: u32, +} + +impl<'a> Parser<'a> { + fn new(text: &'a str, base: u32) -> Self { + Self { + text, + bytes: text.as_bytes(), + pos: 0, + base, + } + } + + fn eof(&self) -> bool { + self.pos >= self.bytes.len() + } + + fn rest(&self) -> &str { + &self.text[self.pos..] + } + + fn skip_ws(&mut self) { + while self.pos < self.bytes.len() + && self.bytes[self.pos].is_ascii_whitespace() + { + self.pos += 1; + } + } + + fn here(&self) -> u32 { + self.base + self.pos as u32 + } + + /// Zero-width span at the current position — used for "unexpected + /// token" diagnostics where there's no real token to underline. + fn here_span(&self) -> Span { + let h = self.here(); + Span::new(h, h) + } + + fn span_from(&self, start: usize) -> Span { + Span::new(self.base + start as u32, self.base + self.pos as u32) + } + + /// Consume one bare identifier, returning its text and span. + /// Identifiers start with `[A-Za-z_]` and contain `[A-Za-z0-9_]`. + fn parse_ident(&mut self) -> Result<(String, Span), TypeParseError> { + self.skip_ws(); + let start = self.pos; + if self.eof() { + return Err(TypeParseError { + message: "expected type name, found end of input".into(), + span: self.here_span(), + }); + } + let first = self.bytes[self.pos]; + if !(first.is_ascii_alphabetic() || first == b'_') { + return Err(TypeParseError { + message: format!( + "expected type name, found `{}`", + first as char + ), + span: self.here_span(), + }); + } + self.pos += 1; + while self.pos < self.bytes.len() { + let c = self.bytes[self.pos]; + if c.is_ascii_alphanumeric() || c == b'_' { + self.pos += 1; + } else { + break; + } + } + let name = self.text[start..self.pos].to_string(); + Ok((name, self.span_from(start))) + } + + fn parse_type(&mut self) -> Result { + let start = self.pos; + self.skip_ws(); + let ident_start = self.pos; + let (name, name_span) = self.parse_ident()?; + self.skip_ws(); + // Optional `::Variant` qualified-path suffix. Mutually + // exclusive with the `<…>` generic form — `E::V` is + // rejected below. + if self.pos + 1 < self.bytes.len() + && self.bytes[self.pos] == b':' + && self.bytes[self.pos + 1] == b':' + { + self.pos += 2; // :: + let (variant, variant_span) = self.parse_ident()?; + self.skip_ws(); + // Reject `E::V<…>` — qualified names don't take generic + // args (their purpose is to name one variant of a + // declared enum, which has no type parameters in v1). + if !self.eof() && self.bytes[self.pos] == b'<' { + return Err(TypeParseError { + message: format!( + "qualified type `{name}::{variant}` cannot take \ + generic arguments" + ), + span: self.here_span(), + }); + } + return Ok(TypeExpr::Qualified { + namespace: name, + variant, + namespace_span: name_span, + variant_span, + span: self.span_from(start), + }); + } + // Optional `<...>` generic argument list. + if !self.eof() && self.bytes[self.pos] == b'<' { + self.pos += 1; // < + let mut args = Vec::new(); + // Allow empty? No — `list<>` is meaningless. Require + // at least one arg. + args.push(self.parse_type()?); + self.skip_ws(); + while !self.eof() && self.bytes[self.pos] == b',' { + self.pos += 1; // , + args.push(self.parse_type()?); + self.skip_ws(); + } + if self.eof() { + return Err(TypeParseError { + message: format!( + "unterminated generic type `{name}<…>`: \ + expected `>` or `,`", + ), + span: self.span_from(ident_start), + }); + } + if self.bytes[self.pos] != b'>' { + return Err(TypeParseError { + message: format!( + "expected `>` or `,` in generic type `{name}<…>`, \ + found `{}`", + self.bytes[self.pos] as char + ), + span: self.here_span(), + }); + } + self.pos += 1; // > + return Ok(TypeExpr::Generic { + name, + name_span, + args, + span: self.span_from(start), + }); + } + Ok(TypeExpr::Named { + name, + span: name_span, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn p(s: &str) -> TypeExpr { + parse(s, 0).unwrap_or_else(|e| panic!("parse failed: {e:?}")) + } + + #[test] + fn named_simple() { + let ty = p("string"); + match ty { + TypeExpr::Named { name, span } => { + assert_eq!(name, "string"); + assert_eq!(span, Span::new(0, 6)); + } + _ => panic!("expected Named"), + } + } + + #[test] + fn generic_single_arg() { + let ty = p("list"); + let TypeExpr::Generic { + name, args, span, .. + } = ty + else { + panic!("expected Generic"); + }; + assert_eq!(name, "list"); + assert_eq!(span, Span::new(0, 13)); + assert_eq!(args.len(), 1); + assert_eq!(args[0].name(), "bd_cell"); + } + + #[test] + fn generic_two_args() { + let ty = p("dict"); + let TypeExpr::Generic { name, args, .. } = ty else { + panic!(); + }; + assert_eq!(name, "dict"); + assert_eq!(args.len(), 2); + assert_eq!(args[0].name(), "string"); + assert_eq!(args[1].name(), "int"); + } + + #[test] + fn nested_generic() { + let ty = p("dict>"); + let TypeExpr::Generic { args, .. } = ty else { + panic!() + }; + assert_eq!(args.len(), 2); + assert_eq!(args[0].name(), "string"); + let TypeExpr::Generic { + name, args: inner, .. + } = &args[1] + else { + panic!("expected inner generic"); + }; + assert_eq!(name, "list"); + assert_eq!(inner[0].name(), "int"); + } + + #[test] + fn deeply_nested() { + let ty = p("list>"); + let TypeExpr::Generic { name, args, .. } = ty else { + panic!() + }; + assert_eq!(name, "list"); + let TypeExpr::Generic { + name: inner_name, + args: inner_args, + .. + } = &args[0] + else { + panic!(); + }; + assert_eq!(inner_name, "dict"); + assert_eq!(inner_args.len(), 2); + assert_eq!(inner_args[0].name(), "string"); + assert_eq!(inner_args[1].name(), "bd_cell"); + } + + #[test] + fn whitespace_between_tokens_is_fine() { + let ty = p(" dict < string , int > "); + let TypeExpr::Generic { name, args, .. } = ty else { + panic!() + }; + assert_eq!(name, "dict"); + assert_eq!(args.len(), 2); + } + + #[test] + fn span_uses_base_offset() { + let ty = parse("bd_cell", 100).unwrap(); + let TypeExpr::Named { span, .. } = ty else { + panic!() + }; + assert_eq!(span, Span::new(100, 107)); + } + + #[test] + fn err_empty_input() { + let e = parse("", 0).unwrap_err(); + assert!(e.message.contains("expected type name")); + } + + #[test] + fn err_invalid_ident_start() { + let e = parse("", 0).unwrap_err(); + assert!( + e.message.contains("cannot take generic arguments"), + "{}", + e.message + ); + } + + #[test] + fn err_qualified_missing_variant() { + let e = parse("Property::", 0).unwrap_err(); + assert!(e.message.contains("expected type name"), "{}", e.message); + } + + #[test] + fn err_single_colon_not_qualified() { + // `Property:Scalar` (one colon) — not a qualified form. The + // first ident parses, then the trailing `:Scalar` is junk. + let e = parse("Property:Scalar", 0).unwrap_err(); + assert!(e.message.contains("unexpected")); + } +} diff --git a/vw-htcl/src/validate.rs b/vw-htcl/src/validate.rs new file mode 100644 index 0000000..211bf3c --- /dev/null +++ b/vw-htcl/src/validate.rs @@ -0,0 +1,2470 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Signature-aware call-site validation. +//! +//! Builds a {proc_name → ProcSignature} table from the top-level +//! procs in a document, then walks every call site in the same +//! document and checks the keyword arguments against the declared +//! signature. Diagnostics are language-neutral; downstream (the LSP, +//! `vw check`) maps them to the appropriate display form. + +use std::collections::HashMap; + +use crate::ast::{ + Attribute, AttributeValue, Command, CommandKind, Document, EnumDecl, + OverloadInfo, OverloadVariant, Proc, ProcArg, ProcSignature, Stmt, + TypeDecl, TypeExpr, Word, WordPart, +}; +use crate::span::Span; + +/// Side-table produced alongside the signature table by +/// [`build_signature_table_with_overloads`]. Maps each public proc +/// name that resolves to an enum-overload set to its [`OverloadInfo`]. +/// Names not in this map are regular (non-overloaded) procs. +pub type OverloadTable = HashMap; + +/// Mangle a specialization's internal name. The `__` prefix is +/// reserved (the validator rejects user procs whose names start +/// with `__`) so mangled names don't collide with anything +/// user-written. +/// +/// For namespaced public names (`Property::as_nested`), the +/// prefix goes on the LEAF, not the whole name — otherwise the +/// mangled form (`__Property::as_nested__Nested`) puts the proc +/// in a fictional `__Property` namespace Tcl hasn't created, and +/// `proc` errors with "unknown namespace." Keeping the leaf-only +/// prefix (`Property::__as_nested__Nested`) places the +/// specialization inside the SAME namespace as its public +/// dispatcher, which the enum prelude or user `namespace eval` +/// already declared. +pub fn mangle_specialization(public_name: &str, variant: &str) -> String { + match public_name.rsplit_once("::") { + Some((ns, leaf)) => format!("{ns}::__{leaf}__{variant}"), + None => format!("__{public_name}__{variant}"), + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Severity { + Error, + Warning, +} + +#[derive(Clone, Debug)] +pub struct Diagnostic { + pub severity: Severity, + pub message: String, + pub span: Span, +} + +pub fn validate(document: &Document, source: &str) -> Vec { + validate_with_signatures(document, source, &HashMap::new()) +} + +/// Same as [`validate`], but resolves unknown calls against an +/// additional pool of signatures supplied by the caller — used by +/// the REPL to make procs declared in earlier session batches +/// visible to a new batch without re-parsing the whole prelude. +/// +/// Merge rules: +/// +/// - The document's own signatures shadow `extra`. Redefining a +/// proc in `document` overrides the prior version (Tcl +/// semantics — a second `proc` redefines). +/// - Duplicate-definition diagnostics only fire for collisions +/// **within** `document`. A new batch that re-`src`s a wrapper +/// already loaded earlier shouldn't warn on every input. +pub fn validate_with_signatures<'doc>( + document: &'doc Document, + source: &str, + extra: &HashMap, +) -> Vec { + validate_with_extras(document, source, extra, &HashMap::new()) +} + +/// Full validation entry point: same as [`validate_with_signatures`] +/// but also takes a pool of newtype declarations from prior session +/// batches. Lets the REPL drop a `proc bd_cell::repr` in batch N +/// without re-tripping "type bd_cell missing repr" diagnostics for +/// the `type bd_cell = string` declaration in batch N-1. +pub fn validate_with_extras<'doc>( + document: &'doc Document, + source: &str, + extra_sigs: &HashMap, + extra_types: &HashMap, +) -> Vec { + validate_with_all_extras( + document, + source, + extra_sigs, + extra_types, + &HashMap::new(), + ) +} + +/// Full validation entry point. Accepts a prior-batch pool of +/// signatures, type declarations, AND enum declarations, so the +/// REPL can split an `enum E = …` decl across batches from the +/// procs that dispatch on it. +pub fn validate_with_all_extras<'doc>( + document: &'doc Document, + source: &str, + extra_sigs: &HashMap, + extra_types: &HashMap, + extra_enums: &HashMap, +) -> Vec { + let mut diags = Vec::new(); + let (mut table, _overloads) = + build_signature_table_with_overloads(document, &mut diags); + // Prior-batch signatures fill in the gaps. The doc's own entries + // win because `entry().or_insert(...)` is a no-op on present keys. + for (name, sig) in extra_sigs { + table.entry(name.clone()).or_insert(*sig); + } + let mut type_table = build_type_decl_table(document, &mut diags); + for (name, td) in extra_types { + type_table.entry(name.clone()).or_insert(*td); + } + let mut enum_table = build_enum_decl_table(document, &mut diags); + for (name, ed) in extra_enums { + enum_table.entry(name.clone()).or_insert(*ed); + } + validate_type_decl_triplets(&type_table, &table, &mut diags); + validate_enum_decls(&enum_table, &type_table, &mut diags); + validate_qualified_positions(document, &mut diags); + validate_stmts(&document.stmts, source, &table, &mut diags); + diags +} + +/// Validate every command in `stmts`, descending into proc bodies so +/// that calls nested inside a proc are checked just like top-level +/// ones. The signature table is document-wide, so a call resolves to +/// its (top-level) proc at any depth. +fn validate_stmts( + stmts: &[Stmt], + source: &str, + table: &HashMap, + diags: &mut Vec, +) { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + validate_command(cmd, source, table, diags); + match &cmd.kind { + CommandKind::Proc(proc) => { + validate_stmts(&proc.body, source, table, diags); + } + CommandKind::NamespaceEval(ns) => { + // Calls inside the namespace body are validated the + // same way; the signature-table is document-wide so + // a call to `project::set_target_language` from + // anywhere resolves to the same entry. (Bare, + // sibling-relative calls inside a namespace body + // aren't auto-qualified yet — write the qualified + // name explicitly.) + validate_stmts(&ns.body, source, table, diags); + } + _ => {} + } + // Also descend into any `[ … ]` command substitutions on this + // command's words so calls written inline get validated the + // same as top-level ones. + for word in &cmd.words { + for part in &word.parts { + if let WordPart::CmdSubst { body, .. } = part { + validate_stmts(body, source, table, diags); + } + } + } + } +} + +/// Build a name → signature map from every proc declaration in +/// the document, including those nested inside `namespace eval` +/// blocks (which register under `::`, matching Tcl's +/// namespace semantics). Duplicate names raise a diagnostic and the +/// later declaration wins, again matching Tcl (a second `proc` +/// redefines). +pub fn build_signature_table<'doc>( + document: &'doc Document, + diags: &mut Vec, +) -> HashMap { + let (table, _overloads) = + build_signature_table_with_overloads(document, diags); + table +} + +/// Same as [`build_signature_table`] but also returns the +/// [`OverloadTable`] side-map. Callers that need to know whether a +/// given proc name resolves through enum-overload dispatch (codegen, +/// hover, signature help) consult this. +pub fn build_signature_table_with_overloads<'doc>( + document: &'doc Document, + diags: &mut Vec, +) -> (HashMap, OverloadTable) { + // First pass: collect every proc decl per qualified name, + // preserving order so a "first wins" / "last wins" choice is + // unambiguous when we have to make one. Multi-decl entries are + // candidate overload sets; single-decl entries are normal + // procs. + let mut multi: HashMap> = + HashMap::new(); + collect_signatures_multi(&document.stmts, "", &mut multi, diags); + + let mut table: HashMap = HashMap::new(); + let mut overloads: OverloadTable = HashMap::new(); + + for (qualified, decls) in multi { + match decls.len() { + 0 => { /* impossible */ } + 1 => { + let (proc, sig) = decls[0]; + check_reserved_proc_name(&qualified, proc.name_span, diags); + table.insert(qualified, sig); + } + _ => { + // Multi-decl: classify as enum-overload OR emit + // hard error for ad-hoc overloading. + match classify_overload_set(&qualified, &decls, diags) { + Some(info) => { + // Each specialization registers under its + // mangled name so analyzer drill-down + the + // dispatcher's runtime switch can find it. + for v in &info.variants { + // Find the decl whose first arg is this + // variant. We computed the mangled name + // from it during classify, so the order + // matches by construction. + for (_proc, sig) in &decls { + let Some(first) = sig.args.first() else { + continue; + }; + if matches!( + &first.type_annotation, + Some(TypeExpr::Qualified { variant, .. }) + if variant == &v.variant_name + ) { + // Mangled names are compiler- + // generated — they're allowed to + // start with `__` (that's the + // whole point). Skip the + // reserved-name check here. + table.insert( + v.mangled_proc_name.clone(), + sig, + ); + } + } + } + // Public name resolves to the first overload's + // sig as a representative. Analyzer / callers + // that want the "true" public interface + // consult `overloads`. + let (proc, sig) = decls[0]; + check_reserved_proc_name( + &qualified, + proc.name_span, + diags, + ); + table.insert(qualified.clone(), sig); + overloads.insert(qualified, info); + } + None => { + // classify_overload_set already emitted the + // diagnostic; for table consistency, fall + // back to "last wins" so downstream + // validation keeps working. Check the + // reserved prefix on each. + let (proc, sig) = *decls.last().unwrap(); + check_reserved_proc_name( + &qualified, + proc.name_span, + diags, + ); + table.insert(qualified, sig); + } + } + } + } + } + + (table, overloads) +} + +/// User procs whose qualified name starts with `__` would collide +/// with the compiler's overload-specialization mangling +/// (`____`). Reject them up front. +fn check_reserved_proc_name( + qualified: &str, + name_span: Span, + diags: &mut Vec, +) { + // Look at the last segment after the final `::`. Tcl's + // namespace separator is part of the qualified name, so e.g. + // `vivado_cmd::__foo` has its "leaf" name as `__foo` — the + // collision risk is on the leaf, not the prefix. + let leaf = qualified.rsplit("::").next().unwrap_or(qualified); + if leaf.starts_with("__") { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "proc name `{qualified}` is reserved: names starting with \ + `__` are used by the compiler for overload-specialization \ + mangling (e.g. `__handle_prop__Scalar`). Rename to avoid \ + collisions." + ), + span: name_span, + }); + } +} + +fn collect_signatures_multi<'doc>( + stmts: &'doc [Stmt], + prefix: &str, + multi: &mut HashMap>, + diags: &mut Vec, +) { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + match &cmd.kind { + CommandKind::Proc(proc) => { + let Some(name) = proc.name.as_deref() else { + continue; + }; + let Some(sig) = proc.signature.as_ref() else { + continue; + }; + // v1 restriction: enum-overloaded procs must be + // declared at the top level. Inside a `namespace + // eval` block, the REPL's batch-prepare layer + // doesn't re-route to the mangled-name + dispatcher + // pipeline, so an overload arm inside a namespace + // would silently lose its dispatch semantics. + // Detect this here so the user gets a clear error + // instead of a confused runtime behavior. + let is_qualified_first = sig + .args + .first() + .and_then(|a| a.type_annotation.as_ref()) + .map(|t| matches!(t, TypeExpr::Qualified { .. })) + .unwrap_or(false); + if is_qualified_first && !prefix.is_empty() { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "overloaded proc `{name}` is declared inside \ + `namespace eval {prefix}` — v1 enum-overloads \ + must be declared at the top level. Move the \ + overload arms out of the namespace block." + ), + span: proc.name_span, + }); + continue; + } + let qualified = qualify(prefix, name); + multi.entry(qualified).or_default().push((proc, sig)); + } + CommandKind::NamespaceEval(ns) => { + let Some(name) = ns.name.as_deref() else { + continue; + }; + // `extern` is reserved by htcl's lowering as the + // prefix for runtime-Tcl-proc disambiguation + // (`extern::foo` → `__vw_extern_foo`). A user- + // defined namespace named `extern` would silently + // collide with that rewrite at call sites; reject + // it up front. + if name == "extern" { + diags.push(Diagnostic { + severity: Severity::Error, + message: "`extern` is a reserved namespace name in \ + htcl (used for runtime-Tcl-proc \ + disambiguation); pick a different name" + .into(), + span: ns.name_span, + }); + continue; + } + let nested = qualify(prefix, name); + collect_signatures_multi(&ns.body, &nested, multi, diags); + } + _ => {} + } + } +} + +/// Classify a multi-decl proc-name set. Returns `Some(OverloadInfo)` +/// if every member's first arg is a distinct variant of the same +/// enum AND the tail args / return type agree; returns `None` and +/// emits a diagnostic if it's not a valid overload (ad-hoc +/// overloading, missing variant, tail mismatch, etc.). +fn classify_overload_set<'doc>( + public_name: &str, + decls: &[(&'doc Proc, &'doc ProcSignature)], + diags: &mut Vec, +) -> Option { + // Each decl's first arg must be `Qualified { namespace: E, variant: V }`. + // Collect (enum_name, variant_name, dispatch_arg_span) per decl. + let mut dispatch_infos: Vec<(String, String, Span, &Proc, &ProcSignature)> = + Vec::with_capacity(decls.len()); + for (proc, sig) in decls { + let Some(first) = sig.args.first() else { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "proc `{public_name}` is declared multiple times; for \ + this to be a valid enum-overload set, every \ + declaration's first argument must be annotated with \ + a qualified variant type like `E::V`. This one has \ + no arguments." + ), + span: proc.name_span, + }); + return None; + }; + match &first.type_annotation { + Some(TypeExpr::Qualified { + namespace, variant, .. + }) => { + dispatch_infos.push(( + namespace.clone(), + variant.clone(), + first.name_span, + proc, + sig, + )); + } + _ => { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "proc `{public_name}` is declared multiple times \ + with first-arg types that aren't all variants \ + of a common enum; ad-hoc overloading on arbitrary \ + types is not supported. Use an enum or rename \ + one of the procs." + ), + span: first.name_span, + }); + return None; + } + } + } + // All overloads must dispatch on the same enum. + let enum_name = dispatch_infos[0].0.clone(); + for (ns, _, sp, _, _) in &dispatch_infos[1..] { + if ns != &enum_name { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "overload set for proc `{public_name}` mixes enums: \ + `{enum_name}` and `{ns}`. All overloads in a set \ + must dispatch on the same enum." + ), + span: *sp, + }); + return None; + } + } + // Variants must be distinct. + { + let mut seen: std::collections::HashSet<&str> = + std::collections::HashSet::new(); + for (_, v, sp, _, _) in &dispatch_infos { + if !seen.insert(v.as_str()) { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "overload set for proc `{public_name}` has two \ + arms dispatching on the same variant \ + `{enum_name}::{v}`. Each variant must have at \ + most one arm." + ), + span: *sp, + }); + return None; + } + } + } + // Tail-arg agreement: v1 restricts every arm to exactly one + // arg (the dispatched variant). Multi-arg overloads are + // future work — kwargs / specialization-binding interactions + // get hairy and the property-display motivating case doesn't + // need them. + let (_, _, _, first_proc, first_sig) = &dispatch_infos[0]; + if first_sig.args.len() != 1 { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "overload arm `{public_name}` declares {} args; v1 \ + enum-overloads support exactly ONE arg (the dispatched \ + variant). Additional tail args are future work — model \ + the tail as a payload field on the enum variant for now.", + first_sig.args.len() + ), + span: first_sig.span, + }); + return None; + } + for (ns, v, _, _, sig) in &dispatch_infos[1..] { + if sig.args.len() != 1 { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "overload arm `{public_name}` for `{ns}::{v}` declares \ + {} args; v1 enum-overloads support exactly ONE arg \ + (the dispatched variant).", + sig.args.len() + ), + span: sig.span, + }); + return None; + } + } + // Return-type agreement: every annotated return type must match. + // Mixed annotated/unannotated → error. + let first_ret = first_sig.return_type.as_ref(); + for (ns, v, _, _, sig) in &dispatch_infos[1..] { + match (first_ret, sig.return_type.as_ref()) { + (None, None) => {} + (Some(a), Some(b)) if types_match(a, b) => {} + _ => { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "overload arm `{public_name}` for `{ns}::{v}` \ + declares a different return type than the other \ + arms. All arms must agree on the return type \ + (annotate every arm with the same type, or none)." + ), + span: sig.span, + }); + return None; + } + } + } + // Arg-name agreement: every arm must use the same first-arg + // name so the dispatcher can pass the payload via kwargs as + // `- `. Cheaper than per-arm dispatch + // tracking and matches user convention (everyone writes `v`). + let dispatch_arg_name = first_sig.args[0].name.clone(); + for (ns, v, _, _, sig) in &dispatch_infos[1..] { + if sig.args[0].name != dispatch_arg_name { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "overload arm `{public_name}` for `{ns}::{v}` names its \ + dispatch arg `{}`; other arms name it `{dispatch_arg_name}`. \ + All arms must use the same arg name (convention: `v`).", + sig.args[0].name + ), + span: sig.args[0].name_span, + }); + return None; + } + } + // Build the OverloadInfo. Variant order matches source order + // of the overloads. + let variants = dispatch_infos + .iter() + .map(|(_, v, sp, _, _)| OverloadVariant { + variant_name: v.clone(), + mangled_proc_name: mangle_specialization(public_name, v), + dispatch_arg_span: *sp, + }) + .collect(); + Some(OverloadInfo { + public_name: public_name.to_string(), + enum_name, + dispatch_arg_name, + variants, + anchor_span: first_proc.name_span, + }) +} + +// `tails_match` / `attr_values_equal` lived here for the multi-arg +// overload tail-agreement check. v1 restricts overloads to a single +// arg (see `classify_overload_set`), so we don't compare tails. The +// helpers are kept as a record in git history; restore when adding +// multi-arg overloads. + +/// Collect every `type NAME = UNDERLYING` declaration in `document`, +/// qualified by enclosing `namespace eval` prefix (so a `type widget` +/// declared inside `namespace eval foo {}` registers as `foo::widget`, +/// matching how procs already qualify). Duplicate declarations emit +/// a warning and the later one wins — same shape as duplicate-proc +/// handling above. +pub fn build_type_decl_table<'doc>( + document: &'doc Document, + diags: &mut Vec, +) -> HashMap { + let mut table = HashMap::new(); + collect_type_decls(&document.stmts, "", &mut table, diags); + table +} + +fn collect_type_decls<'doc>( + stmts: &'doc [Stmt], + prefix: &str, + table: &mut HashMap, + diags: &mut Vec, +) { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + match &cmd.kind { + CommandKind::TypeDecl(td) => { + let Some(name) = td.name.as_deref() else { + continue; + }; + let qualified = qualify(prefix, name); + if table.insert(qualified.clone(), td).is_some() { + diags.push(Diagnostic { + severity: Severity::Warning, + message: format!( + "duplicate definition of type {qualified}; \ + later definition wins" + ), + span: td.name_span, + }); + } + } + CommandKind::NamespaceEval(ns) => { + let Some(name) = ns.name.as_deref() else { + continue; + }; + if name == "extern" { + continue; + } + let nested = qualify(prefix, name); + collect_type_decls(&ns.body, &nested, table, diags); + } + CommandKind::Proc(proc) => { + // Nested type decls inside proc bodies are unusual + // but not illegal — walk them so they register. + collect_type_decls(&proc.body, prefix, table, diags); + } + _ => {} + } + } +} + +/// Mirror of [`build_type_decl_table`] for `enum NAME = { ... }` +/// declarations. Duplicate enums warn and the later one wins — +/// same shape as type-decl handling. +pub fn build_enum_decl_table<'doc>( + document: &'doc Document, + diags: &mut Vec, +) -> HashMap { + let mut table = HashMap::new(); + collect_enum_decls(&document.stmts, "", &mut table, diags); + table +} + +fn collect_enum_decls<'doc>( + stmts: &'doc [Stmt], + prefix: &str, + table: &mut HashMap, + diags: &mut Vec, +) { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + match &cmd.kind { + CommandKind::EnumDecl(ed) => { + let Some(name) = ed.name.as_deref() else { + continue; + }; + let qualified = qualify(prefix, name); + if table.insert(qualified.clone(), ed).is_some() { + diags.push(Diagnostic { + severity: Severity::Warning, + message: format!( + "duplicate definition of enum {qualified}; \ + later definition wins" + ), + span: ed.name_span, + }); + } + } + CommandKind::NamespaceEval(ns) => { + let Some(name) = ns.name.as_deref() else { + continue; + }; + if name == "extern" { + continue; + } + let nested = qualify(prefix, name); + collect_enum_decls(&ns.body, &nested, table, diags); + } + CommandKind::Proc(proc) => { + collect_enum_decls(&proc.body, prefix, table, diags); + } + _ => {} + } + } +} + +/// Per-enum sanity checks. v1: variants must have distinct names; +/// payload types are syntactically valid (already enforced by +/// `enum_parse`); a payload that references an unknown user type +/// is a soft warning for now (could be defined cross-batch). +fn validate_enum_decls( + enum_table: &HashMap, + _type_table: &HashMap, + diags: &mut Vec, +) { + for (qualified, ed) in enum_table { + let mut seen: std::collections::HashSet<&str> = + std::collections::HashSet::new(); + for v in &ed.variants { + if !seen.insert(v.name.as_str()) { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "enum `{qualified}` declares variant `{}` more than \ + once. Each variant name must be unique within an \ + enum.", + v.name + ), + span: v.name_span, + }); + } + } + } +} + +/// Walk the document and reject [`TypeExpr::Qualified`] anywhere +/// other than as a proc's first-arg type annotation. Qualified +/// types (`E::V`) are only meaningful as overload-dispatch +/// indicators; they're nonsense as return types, generic args, +/// nested type positions, or any non-first-arg slot. +fn validate_qualified_positions( + document: &Document, + diags: &mut Vec, +) { + fn walk_stmts(stmts: &[Stmt], diags: &mut Vec) { + for stmt in stmts { + let Stmt::Command(cmd) = stmt else { continue }; + match &cmd.kind { + CommandKind::Proc(proc) => { + if let Some(sig) = proc.signature.as_ref() { + for (i, arg) in sig.args.iter().enumerate() { + if let Some(ty) = arg.type_annotation.as_ref() { + // The first arg may be Qualified; + // tail args may NOT. + let allow_qualified = i == 0; + reject_nested_qualified( + ty, + allow_qualified, + diags, + ); + } + } + if let Some(ret) = sig.return_type.as_ref() { + reject_nested_qualified(ret, false, diags); + } + } + walk_stmts(&proc.body, diags); + } + CommandKind::NamespaceEval(ns) => { + walk_stmts(&ns.body, diags); + } + CommandKind::TypeDecl(td) => { + if let Some(ty) = td.underlying.as_ref() { + reject_nested_qualified(ty, false, diags); + } + } + CommandKind::EnumDecl(ed) => { + for v in &ed.variants { + if let Some(ty) = v.payload.as_ref() { + reject_nested_qualified(ty, false, diags); + } + } + } + _ => {} + } + } + } + walk_stmts(&document.stmts, diags); +} + +fn reject_nested_qualified( + ty: &TypeExpr, + allow_top_qualified: bool, + diags: &mut Vec, +) { + match ty { + TypeExpr::Named { .. } => {} + TypeExpr::Generic { args, .. } => { + // Inside a generic, nested Qualified is never allowed. + for a in args { + reject_nested_qualified(a, false, diags); + } + } + TypeExpr::Qualified { + namespace, + variant, + span, + .. + } => { + if !allow_top_qualified { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "qualified type `{namespace}::{variant}` is only \ + legal as the first-argument type annotation on an \ + overloaded handler proc. It can't appear as a \ + return type, generic argument, type-decl \ + underlying, or enum-variant payload." + ), + span: *span, + }); + } + } + } +} + +/// For each newtype declaration `T`, verify the user provided the +/// required `T::repr`, `T::from`, `T::to` procs with the correct +/// shapes: +/// +/// - `T::repr` takes one arg named `v` of type `T` (or untyped), +/// returns `string` (or untyped). +/// - `T::from` takes one arg named `v` of type `` (or +/// untyped), returns `T` (or untyped). +/// - `T::to` takes one arg named `v` of type `T` (or untyped), +/// returns `` (or untyped). +/// +/// Type annotations are *optional* on these procs — an untyped +/// arg or return slot is accepted as a "trust the user" form +/// (some procs ship pre-arg-types and were authored before the +/// shape check existed). The arg COUNT and NAME (`v`) are +/// always enforced; the type slots get a stricter check only +/// when the user opted in by annotating them. +fn validate_type_decl_triplets( + type_table: &HashMap, + sig_table: &HashMap, + diags: &mut Vec, +) { + use crate::ast::TypeExpr; + for (qualified_name, td) in type_table { + let underlying = td.underlying.as_ref(); + let slots: &[(&str, Option<&TypeExpr>, Option<&str>)] = &[ + // (slot, arg type expected, return type expected as + // type-name). We pass the type-name via Option<&str> + // and compare with TypeExpr::Named's name; that's + // sufficient for the v1 set (all involved types are + // either named primitives or named newtypes; no + // generics in repr/from/to signatures). + ( + "repr", + // arg should be T + Some(&named_lit(qualified_name)), + Some("string"), + ), + ( + "from", + // arg should be + underlying, + // return should be T + Some(qualified_name.as_str()), + ), + ( + "to", + // arg should be T + Some(&named_lit(qualified_name)), + // return should be + underlying.and_then(|u| match u { + TypeExpr::Named { name, .. } => Some(name.as_str()), + _ => None, + }), + ), + ]; + for (slot, expected_arg, expected_ret) in slots { + let want = format!("{qualified_name}::{slot}"); + let Some(sig) = sig_table.get(&want) else { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "newtype `{qualified_name}` is missing required \ + proc `{qualified_name}::{slot}` (see \ + docs/htcl-return-types.md)." + ), + span: td.name_span, + }); + continue; + }; + // Arg count + name. + if sig.args.len() != 1 || sig.args[0].name != "v" { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "newtype proc `{qualified_name}::{slot}` must \ + take exactly one argument named `v`" + ), + span: sig.span, + }); + continue; + } + // Arg type — only checked when the user annotated it. + if let (Some(actual), Some(expected)) = + (sig.args[0].type_annotation.as_ref(), expected_arg) + { + if !types_match(actual, expected) { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "newtype proc `{qualified_name}::{slot}`: \ + arg `v` is declared `{}` but should be \ + `{}`", + render_type_inline(actual), + render_type_inline(expected) + ), + span: sig.args[0].name_span, + }); + } + } + // Return type — only checked when the user annotated it + // and we know what to compare against. + if let (Some(actual), Some(want_name)) = + (sig.return_type.as_ref(), expected_ret) + { + let actual_name = match actual { + TypeExpr::Named { name, .. } => name.as_str(), + TypeExpr::Generic { name, .. } => name.as_str(), + // A qualified type like `E::V` shouldn't appear + // as a newtype's return type — that's caught by + // the dedicated Qualified-position validator + // step. If it slips through, render the + // namespace name so the user sees something + // meaningful in the diagnostic. + TypeExpr::Qualified { namespace, .. } => namespace.as_str(), + }; + if actual_name != *want_name { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "newtype proc `{qualified_name}::{slot}` \ + returns `{}` but should return `{want_name}`", + render_type_inline(actual) + ), + span: sig.span, + }); + } + } + } + } +} + +/// Build a one-shot `TypeExpr::Named` literal for comparison +/// purposes. The span is meaningless here — we only ever +/// inspect the name. +fn named_lit(name: &str) -> crate::ast::TypeExpr { + crate::ast::TypeExpr::Named { + name: name.to_string(), + span: Span::new(0, 0), + } +} + +/// Structural equality on type expressions, ignoring spans. +fn types_match(a: &crate::ast::TypeExpr, b: &crate::ast::TypeExpr) -> bool { + use crate::ast::TypeExpr; + match (a, b) { + ( + TypeExpr::Named { name: an, .. }, + TypeExpr::Named { name: bn, .. }, + ) => an == bn, + ( + TypeExpr::Generic { + name: an, args: aa, .. + }, + TypeExpr::Generic { + name: bn, args: ba, .. + }, + ) => { + an == bn + && aa.len() == ba.len() + && aa.iter().zip(ba.iter()).all(|(x, y)| types_match(x, y)) + } + _ => false, + } +} + +/// Render a type expression for inclusion in a diagnostic message. +/// Mirrors `vw-analyzer/src/htcl_backend.rs::render_type` — kept +/// in sync by convention since the analyzer can't depend on +/// validate.rs. +fn render_type_inline(ty: &crate::ast::TypeExpr) -> String { + use crate::ast::TypeExpr; + match ty { + TypeExpr::Named { name, .. } => name.clone(), + TypeExpr::Generic { name, args, .. } => { + let inner: Vec = + args.iter().map(render_type_inline).collect(); + format!("{name}<{}>", inner.join(",")) + } + TypeExpr::Qualified { + namespace, variant, .. + } => { + format!("{namespace}::{variant}") + } + } +} + +/// Tcl core builtins that legitimately take either `-flag` +/// arguments natively (`string match -nocase`, `regexp -line`, +/// `lsort -unique`) or take positional list arguments that +/// commonly start with `-` (e.g. `lappend cmd -ruledeck $x` where +/// `-ruledeck` is being appended as a literal token, not parsed +/// by `lappend`). Calls to anything in this list pass the +/// unknown-call check unconditionally. +/// +/// Keep this small but pragmatic: a missed builtin produces a +/// pestering error on calls that work fine; an over-included name +/// hides a real "you forgot to src @x" mistake. The set below is +/// the standard Tcl core surface most htcl bodies actually use. +fn is_known_tcl_builtin(name: &str) -> bool { + matches!( + name, + // Container ops whose positional args often look like flags. + "lappend" + | "lset" + | "linsert" + | "lreplace" + | "lrange" + | "lindex" + | "list" + | "llength" + | "dict" + | "array" + | "set" + | "unset" + | "incr" + | "append" + | "concat" + // String / regex / sort builtins that accept `-flag`s natively. + | "string" + | "regexp" + | "regsub" + | "lsort" + | "lsearch" + | "switch" + | "format" + | "scan" + | "binary" + // Flow / introspection / interp. + | "after" + | "eval" + | "uplevel" + | "upvar" + | "apply" + | "info" + | "package" + | "catch" + | "try" + | "throw" + | "error" + | "return" + | "expr" + // I/O & filesystem. + | "puts" + | "gets" + | "read" + | "close" + | "open" + | "file" + | "exec" + | "fconfigure" + | "fileevent" + | "flush" + // Channels / Tk-style. + | "namespace" + | "variable" + | "global" + | "rename" + | "interp" + ) +} + +/// Join a namespace prefix with a member name using Tcl's `::` +/// separator. The empty prefix yields the bare name (used at the +/// document root where there's no enclosing namespace). +fn qualify(prefix: &str, name: &str) -> String { + if prefix.is_empty() { + name.to_string() + } else { + format!("{prefix}::{name}") + } +} + +fn validate_command( + cmd: &Command, + source: &str, + table: &HashMap, + diags: &mut Vec, +) { + let call_name = match &cmd.kind { + CommandKind::Generic => match cmd.words.first() { + Some(w) => match w.as_text() { + Some(t) => t, + None => return, + }, + None => return, + }, + // Don't validate inside declarations themselves — those + // aren't calls. (NamespaceEval is a declaration; its body's + // statements are validated by the recursion in + // `validate_stmts`.) + CommandKind::Proc(_) + | CommandKind::Set + | CommandKind::Src(_) + | CommandKind::NamespaceEval(_) + | CommandKind::TypeDecl(_) + | CommandKind::EnumDecl(_) => { + return; + } + }; + // `extern::name` is the user's opt-out: "this call resolves + // to a runtime Tcl proc, don't analyze its signature." Lowering + // strips the prefix and aliases the underlying proc into place. + if crate::lower::is_extern_call(call_name) { + return; + } + let Some(sig) = table.get(call_name) else { + // Unknown call. If it uses `-flag` keyword arguments, the + // user probably meant an htcl wrapper that isn't loaded — + // shipping it to the EDA backend would either error + // cryptically or do something nonsensical with the + // arguments. Force the user to be explicit: either `src` a + // wrapper module, or use `extern::` for the raw + // Tcl/EDA proc. + let uses_keyword = cmd.words.iter().skip(1).any(|w| { + w.as_text() + .is_some_and(|t| t.starts_with('-') && t.len() > 1) + }); + if uses_keyword && !is_known_tcl_builtin(call_name) { + let hint = match suggest_name(call_name, table.keys()) { + Some(s) => format!(" — did you mean `{s}`?"), + None => String::new(), + }; + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "undefined proc `{call_name}`{hint}; either \ + `src` a module that defines it or use \ + `extern::{call_name}` to call the underlying \ + Tcl proc directly" + ), + span: cmd.words[0].span, + }); + } + return; + }; + + // Parse keyword args from the command's words. The first word is + // the call name; the remaining words alternate -flag/value. + let mut idx = 1usize; + let mut seen: HashMap = HashMap::new(); + while idx < cmd.words.len() { + let word = &cmd.words[idx]; + let flag_text = match word.as_text() { + Some(t) if t.starts_with('-') => &t[1..], + Some(t) => { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!("expected keyword argument, found {t}"), + span: word.span, + }); + idx += 1; + continue; + } + None => { + diags.push(Diagnostic { + severity: Severity::Error, + message: "expected keyword argument".into(), + span: word.span, + }); + idx += 1; + continue; + } + }; + let flag_name = flag_text.to_string(); + let value_word = cmd.words.get(idx + 1); + + match sig.find(&flag_name) { + None => { + let known: Vec<&str> = + sig.args.iter().map(|a| a.name.as_str()).collect(); + let hint = if known.is_empty() { + String::new() + } else { + format!(". Possible values are {}", known.join(", ")) + }; + diags.push(Diagnostic { + severity: Severity::Error, + message: format!("undefined argument -{flag_name}{hint}"), + span: word.span, + }); + } + Some(arg) => { + if let Some(prev) = seen.insert(flag_name.clone(), word.span) { + let _ = prev; + diags.push(Diagnostic { + severity: Severity::Warning, + message: format!("duplicate argument -{flag_name}"), + span: word.span, + }); + } + if let Some(value) = value_word { + validate_value(call_name, arg, value, source, diags); + } else { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "argument -{flag_name} is missing a value" + ), + span: word.span, + }); + } + } + } + // Step past the flag and its value. + idx += if value_word.is_some() { 2 } else { 1 }; + } + + // Build canonical `@one_of` groups. Each arg's `@one_of(...)` + // declares an alternatives set: the arg itself plus the named + // siblings. We collapse declarations from each direction (sib A + // says `@one_of(B)` and sib B says `@one_of(A)`) into one + // canonical group, then check that **exactly one** arg from each + // group is supplied at the call site. + // + // Args participating in a group are treated as optional for the + // missing-required check below — the group rule is the source of + // truth for "must supply something." + let one_of_groups = collect_one_of_groups(sig); + let in_one_of: std::collections::HashSet<&str> = one_of_groups + .iter() + .flat_map(|g| g.iter().map(String::as_str)) + .collect(); + + // Required-args check. An arg is required when it has no + // `@default` to fall back to — the user must supply a value. + // Args in an `@one_of` group are governed by the group rule + // instead, so skip them here. + for arg in &sig.args { + if seen.contains_key(&arg.name) { + continue; + } + if in_one_of.contains(arg.name.as_str()) { + continue; + } + let is_required = arg.attribute("default").is_none(); + if is_required { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "missing required argument -{name}", + name = arg.name, + ), + span: cmd.span, + }); + } + } + + // `@one_of` groups: exactly one alternative must be present. + for group in &one_of_groups { + let present: Vec<&str> = group + .iter() + .filter(|n| seen.contains_key(n.as_str())) + .map(String::as_str) + .collect(); + if present.len() == 1 { + continue; + } + let opts: Vec = group.iter().map(|n| format!("-{n}")).collect(); + let message = if present.is_empty() { + format!( + "missing required argument — exactly one of {} must be \ + supplied", + opts.join(", ") + ) + } else { + let got: Vec = + present.iter().map(|n| format!("-{n}")).collect(); + format!( + "exactly one of {} may be supplied, got {}", + opts.join(", "), + got.join(", ") + ) + }; + diags.push(Diagnostic { + severity: Severity::Error, + message, + span: cmd.span, + }); + } + + // Inter-arg deps for present args. + for (flag_name, flag_span) in &seen { + let Some(arg) = sig.find(flag_name) else { + continue; + }; + if let Some(req) = arg.attribute("requires") { + for value in &req.values { + let referenced = match value { + AttributeValue::Ident { value, .. } + | AttributeValue::String { value, .. } => value.as_str(), + AttributeValue::Integer { .. } => continue, + }; + if !seen.contains_key(referenced) { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "argument -{flag_name} requires -{referenced} \ + to also be set" + ), + span: *flag_span, + }); + } + } + } + if let Some(conflicts) = arg.attribute("conflicts") { + for value in &conflicts.values { + let referenced = match value { + AttributeValue::Ident { value, .. } + | AttributeValue::String { value, .. } => value.as_str(), + AttributeValue::Integer { .. } => continue, + }; + if seen.contains_key(referenced) { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "argument -{flag_name} conflicts with \ + -{referenced}" + ), + span: *flag_span, + }); + } + } + } + if arg.attribute("deprecated").is_some() { + let msg = arg + .attribute("deprecated") + .and_then(|a| a.values.first()) + .map(|v| v.as_str().to_string()) + .unwrap_or_default(); + let m = if msg.is_empty() { + format!("argument -{flag_name} is deprecated") + } else { + format!("argument -{flag_name} is deprecated: {msg}") + }; + diags.push(Diagnostic { + severity: Severity::Warning, + message: m, + span: *flag_span, + }); + } + } +} + +/// Collect canonical `@one_of` alternatives groups for a signature. +/// +/// Each arg's `@one_of(sib1, sib2, ...)` declares that exactly one +/// of `{arg, sib1, sib2, ...}` must be supplied at the call site. +/// We treat the declaration as symmetric (both `dict @one_of(name)` +/// and `name @one_of(dict)` describe the same group), so a `BTreeSet` +/// of the participating names canonicalizes each group regardless of +/// which direction (or which redundant copies) the author wrote. +fn collect_one_of_groups( + sig: &ProcSignature, +) -> Vec> { + use std::collections::BTreeSet; + let mut seen: std::collections::HashSet> = + std::collections::HashSet::new(); + let mut out: Vec> = Vec::new(); + for arg in &sig.args { + let Some(attr) = arg.attribute("one_of") else { + continue; + }; + let mut group: BTreeSet = BTreeSet::new(); + group.insert(arg.name.clone()); + for value in &attr.values { + match value { + AttributeValue::Ident { value, .. } + | AttributeValue::String { value, .. } => { + group.insert(value.clone()); + } + AttributeValue::Integer { .. } => continue, + } + } + if group.len() >= 2 && seen.insert(group.clone()) { + out.push(group); + } + } + out +} + +fn validate_value( + call_name: &str, + arg: &ProcArg, + value_word: &Word, + _source: &str, + diags: &mut Vec, +) { + // For Phase 2 we only validate literal-text values. Word forms + // that include `$var` or `[cmd]` are runtime-dynamic; we let them + // through silently. Future work can teach the validator about + // values produced by known builtins. + let Some(literal) = literal_value(value_word) else { + return; + }; + + if let Some(enum_attr) = arg.attribute("enum") { + check_enum( + call_name, &arg.name, enum_attr, &literal, value_word, diags, + ); + } + if let Some(range_attr) = arg.attribute("range") { + check_range( + call_name, &arg.name, range_attr, &literal, value_word, diags, + ); + } +} + +fn literal_value(word: &Word) -> Option { + let mut out = String::new(); + for part in &word.parts { + match part { + WordPart::Text { value, .. } => out.push_str(value), + WordPart::Escape { value, .. } => out.push(*value), + WordPart::VarRef { .. } | WordPart::CmdSubst { .. } => { + // Dynamic content — not a literal. + return None; + } + } + } + Some(out) +} + +fn check_enum( + _call_name: &str, + arg_name: &str, + enum_attr: &Attribute, + literal: &str, + value_word: &Word, + diags: &mut Vec, +) { + let allowed: Vec = enum_attr + .values + .iter() + .map(|v| match v { + AttributeValue::Integer { value, .. } => value.to_string(), + AttributeValue::Ident { value, .. } + | AttributeValue::String { value, .. } => value.clone(), + }) + .collect(); + if !allowed.iter().any(|a| a == literal) { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "value {literal} for -{arg_name} is not in @enum. Possible \ + values are {}", + allowed.join(", ") + ), + span: value_word.span, + }); + } +} + +fn check_range( + _call_name: &str, + arg_name: &str, + range_attr: &Attribute, + literal: &str, + value_word: &Word, + diags: &mut Vec, +) { + let (Some(min), Some(max)) = + (range_attr.values.first(), range_attr.values.get(1)) + else { + diags.push(Diagnostic { + severity: Severity::Warning, + message: format!( + "@range on -{arg_name} should have two numeric bounds" + ), + span: range_attr.span, + }); + return; + }; + let ( + AttributeValue::Integer { value: min, .. }, + AttributeValue::Integer { value: max, .. }, + ) = (min, max) + else { + diags.push(Diagnostic { + severity: Severity::Warning, + message: format!("@range on -{arg_name} has non-integer bounds"), + span: range_attr.span, + }); + return; + }; + let Ok(n) = literal.parse::() else { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "argument -{arg_name} expects an integer, found {literal}" + ), + span: value_word.span, + }); + return; + }; + if n < *min || n > *max { + diags.push(Diagnostic { + severity: Severity::Error, + message: format!( + "value {n} for -{arg_name} is out of @range({min}, {max})" + ), + span: value_word.span, + }); + } +} + +/// Standard compiler-style "did you mean X?" suggestion: pick the +/// in-scope name with the smallest edit distance from `target`, +/// within a length-scaled threshold. Returns `None` when no +/// candidate is close enough (so unknown calls that aren't +/// near-misses don't get nonsense suggestions tacked on). +fn suggest_name<'a, I>(target: &str, candidates: I) -> Option +where + I: IntoIterator, +{ + // rustc-style threshold: scales with name length so single-char + // typos count for short names, but a 12-char identifier + // tolerates a few keystroke errors. Floor at 1, ceiling at 3 — + // anything past 3 starts producing surprising suggestions. + let threshold = (target.chars().count() / 3).clamp(1, 3); + let mut best: Option<(usize, &str)> = None; + for cand in candidates { + let d = levenshtein(target, cand); + if d == 0 || d > threshold { + continue; + } + if best.map(|(b, _)| d < b).unwrap_or(true) { + best = Some((d, cand.as_str())); + } + } + best.map(|(_, s)| s.to_string()) +} + +/// Standard Levenshtein edit distance — number of single-character +/// insertions, deletions, or substitutions to turn `a` into `b`. +/// Two-row rolling table; O(n*m) time, O(n) space. +fn levenshtein(a: &str, b: &str) -> usize { + let a: Vec = a.chars().collect(); + let b: Vec = b.chars().collect(); + let m = a.len(); + let n = b.len(); + if m == 0 { + return n; + } + if n == 0 { + return m; + } + let mut prev: Vec = (0..=n).collect(); + let mut cur = vec![0usize; n + 1]; + for i in 1..=m { + cur[0] = i; + for j in 1..=n { + let sub = if a[i - 1] == b[j - 1] { 0 } else { 1 }; + cur[j] = (prev[j] + 1).min(cur[j - 1] + 1).min(prev[j - 1] + sub); + } + std::mem::swap(&mut prev, &mut cur); + } + prev[n] +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::parse; + + fn diags(src: &str) -> Vec { + let parsed = parse(src); + // Parse errors shouldn't be present in these tests; assert + // so that test failures point at the right layer. + assert!( + parsed.errors.is_empty(), + "unexpected parse errors: {:?}", + parsed.errors + ); + validate(&parsed.document, src) + } + + fn proc_decl(body: &str, call: &str) -> String { + format!("proc axis_interface {{\n{body}\n}} {{ # body\n}}\n{call}\n") + } + + #[test] + fn happy_path_no_diagnostics() { + let src = proc_decl( + " @default(0) has_tkeep\n @default(8) tdata_num_bytes", + "axis_interface -has_tkeep 1 -tdata_num_bytes 16", + ); + assert!(diags(&src).is_empty()); + } + + #[test] + fn unknown_arg() { + let src = + proc_decl(" @default(0) has_tkeep", "axis_interface -has_typo 1"); + let d = diags(&src); + assert_eq!(d.len(), 1); + assert!( + d[0].message.contains("undefined argument -has_typo"), + "{:?}", + d + ); + assert!(d[0].message.contains("Possible values are has_tkeep")); + } + + #[test] + fn missing_required() { + let src = proc_decl(" @required width", "axis_interface"); + let d = diags(&src); + assert!(d.iter().any(|d| d.message.contains("missing required"))); + } + + #[test] + fn enum_rejects_unlisted_value() { + let src = proc_decl( + " @enum(1, 2, 4, 8) tdata_num_bytes", + "axis_interface -tdata_num_bytes 3", + ); + let d = diags(&src); + assert!(d.iter().any(|d| d.message.contains("@enum"))); + } + + #[test] + fn enum_accepts_listed_value() { + let src = proc_decl( + " @enum(1, 2, 4, 8) tdata_num_bytes", + "axis_interface -tdata_num_bytes 4", + ); + assert!(diags(&src).is_empty()); + } + + #[test] + fn range_check() { + let src = + proc_decl(" @range(1, 16) width", "axis_interface -width 32"); + let d = diags(&src); + assert!(d.iter().any(|d| d.message.contains("out of @range"))); + } + + #[test] + fn requires_dependency() { + let src = proc_decl( + " @default(0) has_tuser\n @requires(has_tuser) tuser_width", + "axis_interface -tuser_width 8", + ); + let d = diags(&src); + assert!(d.iter().any(|d| d.message.contains("requires")), "{:?}", d); + } + + #[test] + fn conflicts_dependency() { + let src = proc_decl( + " has_a\n @conflicts(has_a) has_b", + "axis_interface -has_a 1 -has_b 1", + ); + let d = diags(&src); + assert!(d.iter().any(|d| d.message.contains("conflicts"))); + } + + #[test] + fn one_of_requires_exactly_one_alternative() { + // Two args in an @one_of group — neither supplied → error. + let src = proc_decl( + " @default(\"\") @one_of(b) a\n @default(\"\") @one_of(a) b", + "axis_interface", + ); + let d = diags(&src); + assert!( + d.iter().any(|m| m.message.contains("exactly one of -a, -b") + && m.message.contains("must be supplied")), + "{:?}", + d + ); + } + + #[test] + fn one_of_satisfied_by_either_alternative() { + let src = proc_decl( + " @default(\"\") @one_of(b) a\n @default(\"\") @one_of(a) b", + "axis_interface -a 1", + ); + assert!(diags(&src).is_empty()); + } + + #[test] + fn one_of_rejects_both_alternatives() { + // Both supplied — should be reported once (group rule). + let src = proc_decl( + " @default(\"\") @one_of(b) a\n @default(\"\") @one_of(a) b", + "axis_interface -a 1 -b 2", + ); + let d = diags(&src); + assert!( + d.iter().any(|m| m.message.contains("got -a, -b")), + "{:?}", + d + ); + } + + #[test] + fn one_of_arg_is_not_treated_as_required() { + // An @one_of arg without @default should NOT trigger the + // separate "missing required" error — the group rule + // supersedes individual required-ness. + let src = + proc_decl(" @one_of(b) a\n @one_of(a) b", "axis_interface -a 1"); + let d = diags(&src); + assert!( + d.iter() + .all(|m| !m.message.contains("missing required argument -a") + && !m.message.contains("missing required argument -b")), + "{:?}", + d + ); + } + + #[test] + fn one_of_declarations_are_symmetric() { + // Declaring `@one_of(b)` on `a` alone is enough; we don't need + // the reverse on `b`. + let src = proc_decl( + " @default(\"\") @one_of(b) a\n @default(\"\") b", + "axis_interface", + ); + let d = diags(&src); + let group_errors: Vec<_> = d + .iter() + .filter(|m| { + m.message.contains("exactly one of") + && m.message.contains("must be supplied") + }) + .collect(); + assert_eq!(group_errors.len(), 1, "{:?}", d); + } + + #[test] + fn namespace_eval_proc_validates_at_qualified_name() { + // A proc declared inside `namespace eval project { ... }` + // should be reachable from the validator at its qualified + // name (`project::set_target_language`), so `@enum` + // constraints on its args still catch bad values at call + // sites — exactly like a top-level proc declaration would. + let src = "\ +namespace eval project { + proc set_target_language { + proj + @enum(VHDL, Verilog) language + } { } +} +project::set_target_language -proj p -language Klingon +"; + let d = diags(src); + assert!( + d.iter().any(|m| m.message.contains("Klingon") + && m.message.contains("@enum")), + "{:?}", + d + ); + } + + #[test] + fn namespaced_proc_satisfied_by_valid_args() { + let src = "\ +namespace eval project { + proc set_target_language { + proj + @enum(VHDL, Verilog) language + } { } +} +project::set_target_language -proj p -language VHDL +"; + assert!(diags(src).is_empty()); + } + + #[test] + fn nested_namespace_eval_qualifies_recursively() { + let src = "\ +namespace eval outer { + namespace eval inner { + proc foo { @enum(a, b) x } { } + } +} +outer::inner::foo -x bogus +"; + let d = diags(src); + assert!(d.iter().any(|m| m.message.contains("bogus")), "{:?}", d); + } + + #[test] + fn unknown_call_gets_did_you_mean_suggestion() { + // The exact shape that caught the user's typo in metroid: + // a single-char edit-distance miss against a known proc + // should produce a `did you mean ...` suggestion. + let src = "\ +namespace eval port { + proc plumb_if_pin { + name + pin + } { } +} +port::plum_if_pin -name p -pin q +"; + let d = diags(src); + let err = d.iter().find(|m| m.severity == Severity::Error).unwrap(); + assert!( + err.message.contains("did you mean `port::plumb_if_pin`"), + "{}", + err.message + ); + } + + #[test] + fn unrelated_unknown_call_has_no_suggestion() { + // A name with no near-miss should NOT get a fake suggestion + // tacked on — that's just misleading noise. + let src = "totally_made_up_thing -arg 1\n"; + let d = diags(src); + let err = d.iter().find(|m| m.severity == Severity::Error).unwrap(); + assert!(!err.message.contains("did you mean"), "{}", err.message); + } + + #[test] + fn unknown_keyword_call_is_an_error() { + // No proc declaration in scope, no `extern::` prefix — the + // call uses `-flag` shape so the validator demands the user + // be explicit about the dependency. + let src = "create_project -in_memory 1 -name foo\n"; + let d = diags(src); + assert!( + d.iter().any(|m| m.severity == Severity::Error + && m.message.contains("create_project") + && m.message.contains("extern::")), + "{:?}", + d + ); + } + + #[test] + fn extern_prefixed_call_skips_unknown_check() { + // `extern::` is the user's opt-out: they're calling a raw + // Tcl proc deliberately. No diagnostic even though the + // name isn't in the signature table. + let src = "extern::create_project -name foo\n"; + assert!(diags(src).is_empty()); + } + + #[test] + fn positional_unknown_call_is_allowed() { + // No `-flag` args → looks like a positional Tcl builtin + // call (puts, set, etc.). Pass through silently. + let src = "puts hello\n"; + assert!(diags(src).is_empty()); + } + + #[test] + fn known_tcl_builtin_with_keyword_args_is_allowed() { + // `string match -nocase ...` is a legitimate Tcl-core + // pattern; the allowlist keeps it from triggering the + // unknown-call error. + let src = "string match -nocase pat str\n"; + assert!(diags(src).is_empty()); + } + + #[test] + fn namespace_eval_extern_is_rejected() { + let src = "namespace eval extern { proc foo {} { } }\n"; + let d = diags(src); + assert!( + d.iter() + .any(|m| m.message.contains("reserved namespace name")), + "{:?}", + d + ); + } + + #[test] + fn duplicate_arg_warns() { + let src = proc_decl(" has_a", "axis_interface -has_a 1 -has_a 2"); + let d = diags(&src); + assert!( + d.iter().any(|d| d.message.contains("duplicate argument")), + "{:?}", + d + ); + } + + #[test] + fn dynamic_value_skips_enum_check() { + let src = + proc_decl(" @enum(1, 2, 4) width", "axis_interface -width $x"); + // $x is runtime; we don't statically know it's outside the + // enum, so no enum diagnostic. + let d = diags(&src); + assert!(d.iter().all(|d| !d.message.contains("@enum"))); + } + + #[test] + fn validates_call_inside_proc_body() { + // A bad flag on a call nested in another proc's body should be + // diagnosed, same as at the top level. + let src = "\ +proc if_tport {\n type\n name\n} { }\n\ +proc axis_if {\n kind\n} {\n if_tport -type t -namze m\n}\n"; + let d = diags(src); + assert!( + d.iter() + .any(|d| d.message.contains("undefined argument -namze")), + "{:?}", + d + ); + } + + #[test] + fn validates_call_inside_command_substitution() { + // The user case: `set cell [create_cpm5 -foo bar]`. The + // validator must descend into `[…]` so the bad flag is caught + // the same way it is at the top level. + let src = "\ +proc create_cpm5 {\n @default(0) name\n} { }\n\ +set cell [create_cpm5 -foo bar]\n"; + let d = diags(src); + assert!( + d.iter() + .any(|d| d.message.contains("undefined argument -foo")), + "{:?}", + d + ); + } + + #[test] + fn arg_with_no_default_is_implicitly_required() { + // `name` has neither `@default` nor `@required` — calling the + // proc without a value for it should still error. + let src = "\ +proc create_cpm5 {\n name\n} { }\n\ +create_cpm5\n"; + let d = diags(src); + assert!( + d.iter() + .any(|d| d.message.contains("missing required argument -name")), + "{:?}", + d + ); + } + + #[test] + fn implicit_required_satisfied_when_supplied() { + let src = "\ +proc create_cpm5 {\n name\n} { }\n\ +create_cpm5 -name x\n"; + assert!(diags(src).is_empty()); + } + + #[test] + fn extra_signatures_resolve_unknown_calls_from_prior_batches() { + // The REPL session case: a wrapper declared in a prior + // batch is in `extra`; the new batch's bare call to it must + // resolve (no `extern::` error) and its keyword args must + // validate against the prior signature. + let prior_src = "\ +namespace eval vivado { + proc create_project { + @default(\"\") name + @enum(0, 1) @default(0) in_memory + } { } +} +"; + let prior_parsed = parse(prior_src); + assert!(prior_parsed.errors.is_empty()); + let mut sink = Vec::new(); + let prior_table = + build_signature_table(&prior_parsed.document, &mut sink); + + // New batch: bare `vivado::create_project -name foo`. No + // declaration in scope here — only the prior batch's table + // saves it from the unknown-keyword-call error. + let new_src = "vivado::create_project -name foo\n"; + let new_parsed = parse(new_src); + let diags = validate_with_signatures( + &new_parsed.document, + new_src, + &prior_table, + ); + assert!( + diags.iter().all(|d| d.severity != Severity::Error), + "{:?}", + diags + ); + + // And the keyword-args still get validated — a bad enum + // value should still error even though the sig came in + // through `extra`. + let bad_src = "vivado::create_project -in_memory bogus\n"; + let bad_parsed = parse(bad_src); + let bad_diags = validate_with_signatures( + &bad_parsed.document, + bad_src, + &prior_table, + ); + assert!( + bad_diags + .iter() + .any(|d| d.message.contains("bogus") + && d.message.contains("@enum")), + "{:?}", + bad_diags + ); + } + + #[test] + fn doc_signatures_shadow_extra_without_warning() { + // Re-declaring a proc in the new batch should NOT raise the + // "duplicate definition" warning against the prior-batch + // signature — that's a normal `src @lib` reload case in the + // REPL and would be noisy. The new declaration takes + // precedence. + let prior_src = "proc foo { @default(0) x } { }\n"; + let prior_parsed = parse(prior_src); + let mut sink = Vec::new(); + let prior_table = + build_signature_table(&prior_parsed.document, &mut sink); + + let new_src = "proc foo { @default(1) y } { }\nfoo -y 2\n"; + let new_parsed = parse(new_src); + let diags = validate_with_signatures( + &new_parsed.document, + new_src, + &prior_table, + ); + assert!( + diags.iter().all(|d| !d.message.contains("duplicate")), + "{:?}", + diags + ); + // And the new sig is the one that resolved: `-y` is + // accepted, `-x` would have been the prior sig's arg. + assert!( + diags.iter().all(|d| d.severity != Severity::Error), + "{:?}", + diags + ); + } + + #[test] + fn unknown_positional_call_is_not_validated() { + // Bare positional call to an unknown name (could be a Tcl + // builtin) is silently accepted. Unknown calls with + // `-flag` args are the *only* unknown-call case that + // errors — see `unknown_keyword_call_is_an_error`. + let src = "axis_interface tkeep_yes 1\n"; + assert!(diags(src).is_empty()); + } + + // --- type-decl triplet enforcement (step 1b) ---------------- + + /// Build a valid type+triplet block — bd_cell with all three + /// procs present so the validator should accept it. + fn full_triplet_src() -> &'static str { + "type bd_cell = string\n\ + proc bd_cell::repr {v} { return $v }\n\ + proc bd_cell::from {v} { return $v }\n\ + proc bd_cell::to {v} { return $v }\n" + } + + #[test] + fn type_decl_with_full_triplet_passes() { + let src = full_triplet_src(); + let d = diags(src); + assert!( + d.iter().all(|d| !d.message.contains("missing required")), + "unexpected diagnostics: {:?}", + d + ); + } + + #[test] + fn type_decl_missing_repr_emits_diagnostic() { + let src = "type bd_cell = string\n\ + proc bd_cell::from {v} { return $v }\n\ + proc bd_cell::to {v} { return $v }\n"; + let d = diags(src); + let hit = d + .iter() + .find(|d| d.message.contains("missing required")) + .expect("expected diagnostic"); + assert!(hit.message.contains("bd_cell::repr"), "{:?}", hit); + assert_eq!(hit.severity, Severity::Error); + } + + #[test] + fn type_decl_missing_all_three_lists_each() { + let src = "type widget = string\n"; + let d = diags(src); + // Now each missing slot emits its own diagnostic, so we + // assert each one shows up separately. + let missing: Vec<&str> = d + .iter() + .filter(|d| d.message.contains("missing required proc")) + .map(|d| d.message.as_str()) + .collect(); + assert!(missing.iter().any(|m| m.contains("widget::repr"))); + assert!(missing.iter().any(|m| m.contains("widget::from"))); + assert!(missing.iter().any(|m| m.contains("widget::to"))); + } + + #[test] + fn type_decl_wrong_arg_type_emits_diagnostic() { + // Annotate the v arg with the wrong type and expect a + // shape-mismatch diagnostic. + let src = "type widget = string\n\ + proc widget::repr {v: int} string { return $v }\n\ + proc widget::from {v: string} widget { return $v }\n\ + proc widget::to {v: widget} string { return $v }\n"; + let d = diags(src); + let hit = d + .iter() + .find(|d| d.message.contains("widget::repr")) + .expect("expected mismatch diagnostic"); + assert!( + hit.message.contains("`int`") || hit.message.contains("int"), + "{:?}", + hit + ); + assert!(hit.message.contains("widget"), "{:?}", hit); + } + + #[test] + fn type_decl_wrong_return_type_emits_diagnostic() { + let src = "type widget = string\n\ + proc widget::repr {v: widget} int { return 0 }\n\ + proc widget::from {v: string} widget { return $v }\n\ + proc widget::to {v: widget} string { return $v }\n"; + let d = diags(src); + let hit = d + .iter() + .find(|d| d.message.contains("returns")) + .expect("expected return-type mismatch diagnostic"); + assert!(hit.message.contains("widget::repr"), "{:?}", hit); + assert!(hit.message.contains("string"), "{:?}", hit); + } + + #[test] + fn type_decl_unannotated_triplet_still_passes() { + // Existence-only check stays a fallback when the user + // hasn't annotated the procs yet. + let src = "type widget = string\n\ + proc widget::repr {v} { return $v }\n\ + proc widget::from {v} { return $v }\n\ + proc widget::to {v} { return $v }\n"; + let d = diags(src); + assert!( + d.iter().all(|d| d.severity != Severity::Error), + "got: {:?}", + d + ); + } + + #[test] + fn type_decl_in_namespace_qualifies() { + let src = "namespace eval x {\n\ + type widget = string\n\ + }\n"; + let d = diags(src); + let hit = d + .iter() + .find(|d| d.message.contains("missing required")) + .expect("expected diagnostic"); + // The namespace-qualified name should appear in the message. + assert!(hit.message.contains("x::widget"), "{:?}", hit); + assert!(hit.message.contains("x::widget::repr")); + } + + #[test] + fn prior_batch_procs_satisfy_current_batch_type_decl() { + // Batch 1: just declares the procs. + let prior_src = "proc bd_cell::repr {v} { return $v }\n\ + proc bd_cell::from {v} { return $v }\n\ + proc bd_cell::to {v} { return $v }\n"; + let prior = parse(prior_src); + let mut prior_diags = Vec::new(); + let prior_sigs = + build_signature_table(&prior.document, &mut prior_diags); + // Batch 2: declares the type — should NOT complain because + // the procs live in the prior batch's signature table. + let new_src = "type bd_cell = string\n"; + let new_parsed = parse(new_src); + let diags = validate_with_signatures( + &new_parsed.document, + new_src, + &prior_sigs, + ); + assert!( + diags + .iter() + .all(|d| !d.message.contains("missing required")), + "got: {:?}", + diags + ); + } + + #[test] + fn prior_batch_type_decl_does_not_re_trigger_in_current_batch() { + // Batch 1: declares the type, no procs yet (would error + // in isolation). + let prior_src = "type bd_cell = string\n"; + let prior = parse(prior_src); + let mut prior_diags = Vec::new(); + let prior_types = + build_type_decl_table(&prior.document, &mut prior_diags); + // Batch 2: adds the procs. The type is in `extra_types`, + // and the procs are in batch 2's signature table. Putting + // them together via validate_with_extras should pass. + let new_src = "proc bd_cell::repr {v} { return $v }\n\ + proc bd_cell::from {v} { return $v }\n\ + proc bd_cell::to {v} { return $v }\n"; + let new_parsed = parse(new_src); + let empty_sigs: HashMap = HashMap::new(); + let d = validate_with_extras( + &new_parsed.document, + new_src, + &empty_sigs, + &prior_types, + ); + assert!( + d.iter().all(|d| !d.message.contains("missing required")), + "got: {:?}", + d + ); + } + + // --- enum + overload classifier (step 3) ---------------------- + + #[test] + fn enum_decl_with_unique_variants_passes() { + let src = "enum Direction = {\n North\n South\n East\n West\n}\n"; + let d = diags(src); + assert!(d.is_empty(), "got: {:?}", d); + } + + #[test] + fn enum_decl_with_duplicate_variants_errors() { + let src = "enum Bad = {\n A: int\n B: string\n A: bool\n}\n"; + let d = diags(src); + let hit = d + .iter() + .find(|d| { + d.severity == Severity::Error + && d.message.contains("variant `A`") + }) + .expect("expected duplicate-variant diagnostic"); + assert!(hit.message.contains("more than once"), "{:?}", hit); + } + + #[test] + fn overload_set_with_exhaustive_arms_classifies() { + let src = "\ +enum Property = {\n Scalar: string\n Nested: int\n}\n\ +proc handle {v: Property::Scalar} { return $v }\n\ +proc handle {v: Property::Nested} { return $v }\n"; + let parsed = parse(src); + assert!(parsed.errors.is_empty(), "{:?}", parsed.errors); + let mut diags = Vec::new(); + let (sig_table, overloads) = + build_signature_table_with_overloads(&parsed.document, &mut diags); + assert!( + diags.iter().all(|d| d.severity != Severity::Error), + "got: {:?}", + diags + ); + let info = overloads.get("handle").expect("overload info for `handle`"); + assert_eq!(info.enum_name, "Property"); + assert_eq!(info.variants.len(), 2); + let names: Vec<&str> = info + .variants + .iter() + .map(|v| v.variant_name.as_str()) + .collect(); + assert!(names.contains(&"Scalar")); + assert!(names.contains(&"Nested")); + // Public-name entry exists in the sig table. + assert!(sig_table.contains_key("handle")); + // Specializations also register under mangled names so + // analyzer drill-down works. + assert!(sig_table.contains_key("__handle__Scalar")); + assert!(sig_table.contains_key("__handle__Nested")); + } + + #[test] + fn ad_hoc_overload_emits_hard_error() { + let src = "\ +proc foo {v: int} { return $v }\n\ +proc foo {v: string} { return $v }\n"; + let d = diags(src); + let hit = d + .iter() + .find(|d| { + d.severity == Severity::Error + && d.message.contains("ad-hoc overloading") + }) + .expect("expected ad-hoc-overloading diagnostic"); + assert!(hit.message.contains("foo"), "{:?}", hit); + } + + #[test] + fn overload_with_mismatched_enums_errors() { + let src = "\ +enum A = {\n X\n Y\n}\n\ +enum B = {\n P\n Q\n}\n\ +proc foo {v: A::X} { }\n\ +proc foo {v: B::P} { }\n"; + let d = diags(src); + assert!( + d.iter().any(|d| d.severity == Severity::Error + && d.message.contains("mixes enums")), + "got: {:?}", + d + ); + } + + #[test] + fn overload_with_duplicate_variant_errors() { + let src = "\ +enum E = {\n A: int\n B: int\n}\n\ +proc foo {v: E::A} { }\n\ +proc foo {v: E::A} { }\n"; + let d = diags(src); + assert!( + d.iter().any(|d| d.severity == Severity::Error + && d.message.contains("two") + && d.message.contains("E::A")), + "got: {:?}", + d + ); + } + + #[test] + fn overload_with_extra_args_errors() { + // v1 restricts overloaded procs to exactly one arg (the + // dispatched variant). Extra tail args trip the arity + // check. + let src = "\ +enum E = {\n A: int\n B: int\n}\n\ +proc foo {\n v: E::A\n x\n} { }\n\ +proc foo {\n v: E::B\n y\n} { }\n"; + let d = diags(src); + assert!( + d.iter().any(|d| d.severity == Severity::Error + && d.message.contains("exactly ONE arg")), + "got: {:?}", + d + ); + } + + #[test] + fn overload_with_mismatched_return_type_errors() { + let src = "\ +enum E = {\n A: int\n B: int\n}\n\ +proc foo {v: E::A} int { return 0 }\n\ +proc foo {v: E::B} string { return \"\" }\n"; + let d = diags(src); + assert!( + d.iter().any(|d| d.severity == Severity::Error + && d.message.contains("return type")), + "got: {:?}", + d + ); + } + + #[test] + fn reserved_prefix_user_proc_errors() { + let src = "proc __foo {v} { return $v }\n"; + let d = diags(src); + assert!( + d.iter().any(|d| d.severity == Severity::Error + && d.message.contains("reserved") + && d.message.contains("__")), + "got: {:?}", + d + ); + } + + #[test] + fn qualified_type_in_return_position_errors() { + let src = "\ +enum E = {\n A\n}\n\ +proc bad {} E::A { }\n"; + let d = diags(src); + assert!( + d.iter().any(|d| d.severity == Severity::Error + && d.message.contains("qualified") + && d.message.contains("only legal")), + "got: {:?}", + d + ); + } + + #[test] + fn qualified_type_in_tail_arg_errors() { + let src = "\ +enum E = {\n A\n B\n}\n\ +proc bad {\n v: E::A\n x: E::B\n} { }\n"; + let d = diags(src); + assert!( + d.iter().any(|d| d.severity == Severity::Error + && d.message.contains("qualified")), + "got: {:?}", + d + ); + } + + #[test] + fn qualified_type_inside_generic_errors() { + let src = "\ +enum E = {\n A\n}\n\ +proc bad {x: list} { }\n"; + let d = diags(src); + assert!( + d.iter().any(|d| d.severity == Severity::Error + && d.message.contains("qualified")), + "got: {:?}", + d + ); + } + + #[test] + fn recursive_enum_passes() { + // Inner generic with whitespace needs brace-wrapping at + // the word level — that's the existing type-decl rule. + let src = "\ +enum Property = {\n Scalar: string\n Nested: Properties\n}\n\ +type Properties = {dict}\n\ +proc Properties::repr {v} { return $v }\n\ +proc Properties::from {v} { return $v }\n\ +proc Properties::to {v} { return $v }\n"; + let d = diags(src); + // No errors — Property/Properties cycle is fine (Tcl + // resolves at call time) and the triplet exists for the + // type-decl side. + assert!( + d.iter().all(|d| d.severity != Severity::Error), + "got: {:?}", + d + ); + } +} diff --git a/vw-ip/Cargo.toml b/vw-ip/Cargo.toml new file mode 100644 index 0000000..fb159ab --- /dev/null +++ b/vw-ip/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "vw-ip" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +ipxact.workspace = true +vw-htcl = { path = "../vw-htcl" } +vw-quote = { path = "../vw-quote" } +thiserror.workspace = true +serde.workspace = true +quick-xml = { version = "0.37", features = ["serialize"] } + +[dev-dependencies] +tempfile.workspace = true diff --git a/vw-ip/src/cips_dict.rs b/vw-ip/src/cips_dict.rs new file mode 100644 index 0000000..fd8b81f --- /dev/null +++ b/vw-ip/src/cips_dict.rs @@ -0,0 +1,592 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Schema loader for Xilinx's `structured_tcldict` IP-XACT parameters. +//! +//! Some IP-XACT parameters in CIPS-family components are declared with +//! `structured_tcldict`: +//! the IP-XACT value is just an opaque space-separated `KEY VAL …` +//! dict string. The real schema for those inner fields lives in +//! out-of-band data files Vivado ships: +//! +//! - `versal/flows/automation/cipsToPsWiz_Porting/csv_files/` +//! - `param_mapping_direct.csv` — `(KEY, {DEFAULT}, …)` per row. +//! - `param_mapping_presets.csv` — preset-bundle layout for +//! `mode`-style selector fields like `CLOCK_MODE`, `BOOT_MODE`. +//! - `versal/cips_hip//guidata/ParamInfo.xml` — per-field +//! `` text, used as a doc comment. +//! - `versal/cips_hip//global/global_preset*.xml` and +//! `versal/cips_hip//presets/**/*.xml` — `` entries used to widen `@enum(…)` lists, same format +//! already parsed by [`crate::presets`]. +//! +//! We deliberately ignore the deprecated +//! `flows/automation/deprecated/cips_pswiz_key_and_value.csv` — its +//! content is a subset of the two supported CSVs above. + +use std::collections::{BTreeSet, HashMap}; +use std::fs; +use std::path::{Path, PathBuf}; + +#[derive(Debug, Clone)] +pub struct DictSchema { + pub fields: Vec, +} + +#[derive(Debug, Clone)] +pub struct DictField { + /// IP-XACT-style upper-snake name, e.g. `PCIE_APERTURES_DUAL_ENABLE`. + pub name: String, + /// Default value as recorded in the supporting data files. May be + /// empty when no default is known (rare); the generator treats it + /// the same as any other defaultless arg. + pub default: String, + /// Display-name / one-line description from `ParamInfo.xml`, when + /// present. + pub description: Option, + /// `@enum(…)` choices we were able to recover from preset files + /// or from `param_mapping_presets.csv`. Empty when no enum data + /// was found. + pub enum_values: BTreeSet, +} + +/// Returns the schema for each `structured_tcldict` parameter we can +/// find data for. Keys are the IP-XACT parameter names +/// (`PS_PMC_CONFIG`, …); the matching `_INTERNAL` variants point at +/// the same schema. +/// +/// Empty when the Xilinx `data/` ancestor can't be located. +pub fn load_schemas(component_path: &Path) -> HashMap { + let mut out = HashMap::new(); + let Some(data_root) = find_data_root(component_path) else { + return out; + }; + if let Some(schema) = load_ps_pmc_schema(&data_root) { + out.insert("PS_PMC_CONFIG".to_string(), schema.clone()); + out.insert("PS_PMC_CONFIG_INTERNAL".to_string(), schema); + } + out +} + +/// Inputs scoped to a CIPS `PS_PMC_CONFIG`. +fn load_ps_pmc_schema(data_root: &Path) -> Option { + let pspmc = data_root.join("versal/cips_hip/pspmc"); + let csv_dir = + data_root.join("versal/flows/automation/cipsToPsWiz_Porting/csv_files"); + if !pspmc.is_dir() || !csv_dir.is_dir() { + return None; + } + + let mut fields: HashMap = HashMap::new(); + parse_direct_csv(&csv_dir.join("param_mapping_direct.csv"), &mut fields); + parse_presets_csv(&csv_dir.join("param_mapping_presets.csv"), &mut fields); + + // Drop keys that belong to a different `structured_tcldict`. + fields.retain(|name, _| { + !name.starts_with("CPM_") + && !name.starts_with("XRAM_") + && !is_cips_toplevel(name) + }); + if fields.is_empty() { + return None; + } + + layer_param_info(&pspmc.join("guidata/ParamInfo.xml"), &mut fields); + layer_presets(&pspmc, &mut fields); + + let mut sorted: Vec = fields.into_values().collect(); + sorted.sort_by(|a, b| a.name.cmp(&b.name)); + Some(DictSchema { fields: sorted }) +} + +/// Names of `` entries that live at the top level of +/// the CIPS IP-XACT — we don't want to re-emit them as inner dict +/// fields. (Recovered by `vw ip generate` separately, but easier to +/// hard-code the small list than to thread the IP-XACT through here.) +fn is_cips_toplevel(name: &str) -> bool { + matches!( + name, + "AURORA_LINE_RATE_GPBS" + | "BOOT_SECONDARY_PCIE_ENABLE" + | "Component_Name" + | "GT_REFCLK_MHZ" + | "PMC_REF_CLK_FREQMHZ" + | "PS_PMC_CONFIG" + | "PS_PMC_CONFIG_INTERNAL" + | "PS_PMC_CONFIG_APPLIED" + | "CPM_CONFIG" + | "CPM_CONFIG_INTERNAL" + | "XRAM_CONFIG" + | "XRAM_CONFIG_INTERNAL" + ) +} + +/// `param_mapping_direct.csv` layout: `,{CIPS_DEFAULT},,{PSWIZ_DEFAULT}`. +/// The `{…}` value cells routinely contain commas (Tcl list syntax), +/// so we tokenize comma-separated columns at brace depth 0 rather than +/// splitting on every comma. Rows whose value has unbalanced braces +/// (the Xilinx CSV does ship a handful of those — line-wrapped or +/// truncated by the vendor) keep the field name but no default. +fn parse_direct_csv(path: &Path, fields: &mut HashMap) { + let Ok(text) = fs::read_to_string(path) else { + return; + }; + let text = text.strip_prefix('\u{feff}').unwrap_or(&text); + for line in text.lines() { + let cols = split_brace_aware(line); + let (Some(key), Some(raw_default)) = ( + cols.first().map(|s| s.trim()), + cols.get(1).map(|s| s.trim()), + ) else { + continue; + }; + if key.is_empty() || !is_safe_key(key) { + continue; + } + let stripped = unwrap_one_brace(raw_default); + let default = if braces_balanced(stripped) { + stripped.to_string() + } else { + String::new() + }; + fields.entry(key.to_string()).or_insert_with(|| DictField { + name: key.to_string(), + default, + description: None, + enum_values: BTreeSet::new(), + }); + } +} + +/// Split a CSV row on commas at brace depth 0. Treats `{` and `}` as +/// Tcl-style grouping characters so that `KEY,{a,b,c},…` splits into +/// three columns rather than five. +fn split_brace_aware(line: &str) -> Vec<&str> { + let mut out = Vec::new(); + let bytes = line.as_bytes(); + let mut start = 0usize; + let mut depth: i32 = 0; + for (i, b) in bytes.iter().enumerate() { + match b { + b'{' => depth += 1, + b'}' => depth -= 1, + b',' if depth == 0 => { + out.push(&line[start..i]); + start = i + 1; + } + _ => {} + } + } + out.push(&line[start..]); + out +} + +fn braces_balanced(s: &str) -> bool { + let mut depth: i32 = 0; + for b in s.bytes() { + match b { + b'{' => depth += 1, + b'}' => { + depth -= 1; + if depth < 0 { + return false; + } + } + _ => {} + } + } + depth == 0 +} + +/// `param_mapping_presets.csv` layout: a single header row lists +/// preset-selector field names (e.g. `BOOT_MODE,CLOCK_MODE,…`); each +/// data row has the selector name in column 0 and one valid value in +/// column 1. The CSV repeats the header row between sections — we +/// detect those repeats and skip them so the header names don't get +/// mistakenly recorded as values of each other. +fn parse_presets_csv(path: &Path, fields: &mut HashMap) { + let Ok(text) = fs::read_to_string(path) else { + return; + }; + let text = text.strip_prefix('\u{feff}').unwrap_or(&text); + let mut lines = text.lines().filter(|l| !l.trim().is_empty()); + let Some(header_line) = lines.next() else { + return; + }; + let headers: BTreeSet = header_line + .split(',') + .map(str::trim) + .filter(|s| !s.is_empty() && is_safe_key(s)) + .map(str::to_string) + .collect(); + + let mut by_key: HashMap> = HashMap::new(); + for line in lines { + let cols: Vec<&str> = line.split(',').map(str::trim).collect(); + let Some(first) = cols.first().filter(|c| !c.is_empty()) else { + continue; + }; + // Skip repeated header rows: every non-empty cell is itself a + // declared header name. + let is_header_row = + cols.iter().all(|c| c.is_empty() || headers.contains(*c)); + if is_header_row { + continue; + } + if !headers.contains(*first) { + continue; + } + let Some(val) = cols.get(1).filter(|v| !v.is_empty()) else { + continue; + }; + by_key + .entry((*first).to_string()) + .or_default() + .insert(unwrap_one_brace(val).to_string()); + } + + for name in headers { + let mut enums = by_key.remove(&name).unwrap_or_default(); + // Vivado UI convention: preset-selector fields always offer + // `Custom` as the "configure each inner field manually" choice + // even when the CSV doesn't enumerate it. It's also the most + // useful default — picking a preset bundle locks the inner + // fields, picking `Custom` lets the user override them. + enums.insert("Custom".to_string()); + let default = "Custom".to_string(); + let f = fields.entry(name.clone()).or_insert_with(|| DictField { + name: name.clone(), + default: default.clone(), + description: None, + enum_values: BTreeSet::new(), + }); + for v in enums { + f.enum_values.insert(v); + } + } +} + +/// Strip one layer of Tcl-style braces from a value if present: +/// `{0}` → `0`, `{{ENABLE 0}}` → `{ENABLE 0}`. Leaves unbalanced or +/// unbraced inputs alone. +fn unwrap_one_brace(s: &str) -> &str { + let s = s.trim(); + if s.len() >= 2 && s.starts_with('{') && s.ends_with('}') { + // Verify the outer braces actually pair with each other (i.e. + // depth reaches 0 only at the final `}`). + let mut depth: i32 = 0; + for (i, b) in s.bytes().enumerate() { + match b { + b'{' => depth += 1, + b'}' => { + depth -= 1; + if depth == 0 && i != s.len() - 1 { + return s; // not a single outer pair + } + } + _ => {} + } + } + return &s[1..s.len() - 1]; + } + s +} + +/// Conservative IP-XACT-style identifier check: starts with a letter +/// or `_`, then alphanumerics / `_`. Anything else is data we don't +/// understand and should ignore (rather than mistake for a field). +fn is_safe_key(s: &str) -> bool { + let mut chars = s.chars(); + let Some(c0) = chars.next() else { + return false; + }; + if !(c0.is_ascii_alphabetic() || c0 == '_') { + return false; + } + chars.all(|c| c.is_ascii_alphanumeric() || c == '_') +} + +/// Layer `X` text from a `ParamInfo.xml` +/// onto matching field descriptions. Uses a tiny line-oriented scan +/// — the schema is too irregular to demand a full XML parser. +fn layer_param_info(path: &Path, fields: &mut HashMap) { + let Ok(text) = fs::read_to_string(path) else { + return; + }; + let mut current: Option = None; + for line in text.lines() { + if let Some(start) = line.find("") { + let after = &line[start + "".len()..]; + if let Some(end) = after.find("") { + let text = after[..end].trim(); + if !text.is_empty() { + if let Some(f) = fields.get_mut(name) { + f.description = Some(text.to_string()); + } + } + } + } + if line.contains("") { + current = None; + } + } + } +} + +/// Layer enum values from preset XML files onto matching fields. +/// We only consume `` entries — those are +/// genuine selector-style enumerations (BOOT_MODE, SMON_ALARMS, …) +/// where Vivado's UI offers a fixed dropdown. The XMLs also contain +/// `` entries, but those are concrete +/// values *applied* by a parent preset; they're not an exhaustive +/// list of valid values. For a numeric field like +/// `PMC_CRP_PL0_REF_CTRL_FREQMHZ` the user can supply any frequency +/// the clock generator can synthesize (e.g. `250`, `195`), so +/// treating `` values as an `@enum` would wrongly reject those. +fn layer_presets(pspmc_dir: &Path, fields: &mut HashMap) { + let mut paths = Vec::new(); + paths.push(pspmc_dir.join("global/global_preset.xml")); + paths.push(pspmc_dir.join("global/global_presetForNonPS.xml")); + walk_for_xml(&pspmc_dir.join("presets"), &mut paths); + + for p in paths { + let Ok(text) = fs::read_to_string(&p) else { + continue; + }; + for line in text.lines() { + if let Some((param, val)) = + extract_two_attrs(line, "` works). +fn extract_two_attrs<'a>( + line: &'a str, + tag: &str, + attr_a: &str, + attr_b: &str, +) -> Option<(&'a str, &'a str)> { + let tag_idx = line.find(tag)?; + let after_tag = &line[tag_idx + tag.len()..]; + let (a, rest) = scan_attr(after_tag, attr_a)?; + let (b, _) = scan_attr(rest, attr_b)?; + Some((a, b)) +} + +fn scan_attr<'a>(s: &'a str, name: &str) -> Option<(&'a str, &'a str)> { + let needle_eq = format!("{name}=\""); + let idx = s.find(&needle_eq)?; + let after = &s[idx + needle_eq.len()..]; + let end = after.find('"')?; + Some((&after[..end], &after[end + 1..])) +} + +fn walk_for_xml(dir: &Path, out: &mut Vec) { + let Ok(entries) = fs::read_dir(dir) else { + return; + }; + for e in entries.flatten() { + let path = e.path(); + if path.is_dir() { + walk_for_xml(&path, out); + } else if path.extension().and_then(|s| s.to_str()) == Some("xml") { + out.push(path); + } + } +} + +/// Walk up `start` looking for an ancestor literally named `data`. +fn find_data_root(start: &Path) -> Option { + for ancestor in start.ancestors() { + if ancestor.file_name().and_then(|s| s.to_str()) == Some("data") { + return Some(ancestor.to_path_buf()); + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn split_brace_aware_respects_tcl_groups() { + assert_eq!(split_brace_aware("a,b,c"), vec!["a", "b", "c"]); + // Commas inside `{…}` stay attached to that cell. + assert_eq!( + split_brace_aware("KEY,{a,b,c},NEXT"), + vec!["KEY", "{a,b,c}", "NEXT"] + ); + // Nested braces. + assert_eq!( + split_brace_aware("K,{{x,y} {z,w}},end"), + vec!["K", "{{x,y} {z,w}}", "end"] + ); + } + + #[test] + fn parse_direct_csv_drops_unbalanced_brace_defaults() { + use std::io::Write; + let f = tempfile::NamedTempFile::new().unwrap(); + // First row is well-formed; second has an unterminated brace + // group (matches a real bug in Xilinx's vendor CSV). + writeln!(&f, "GOOD_KEY,{{0}},GOOD_KEY,{{0}}").unwrap(); + writeln!(&f, "BAD_KEY,{{a,b,c").unwrap(); + let mut m: HashMap = HashMap::new(); + parse_direct_csv(f.path(), &mut m); + assert_eq!(m["GOOD_KEY"].default, "0"); + // BAD_KEY is recorded as a field but with no default. + assert!(m.contains_key("BAD_KEY")); + assert_eq!(m["BAD_KEY"].default, ""); + } + + #[test] + fn unwrap_one_brace_strips_outer_pair_only() { + assert_eq!(unwrap_one_brace("{0}"), "0"); + assert_eq!(unwrap_one_brace("{{ENABLE 0}}"), "{ENABLE 0}"); + assert_eq!(unwrap_one_brace("Custom"), "Custom"); + // Unbalanced — leave alone. + assert_eq!(unwrap_one_brace("{a"), "{a"); + // Two adjacent groups — not a single outer pair. + assert_eq!(unwrap_one_brace("{a}{b}"), "{a}{b}"); + } + + #[test] + fn safe_key_rejects_anything_with_braces_or_special_chars() { + assert!(is_safe_key("PS_USE_PMCPL_CLK0")); + assert!(is_safe_key("_misc")); + assert!(!is_safe_key("{0}")); + assert!(!is_safe_key("0_LEADS_WITH_DIGIT")); + assert!(!is_safe_key("")); + assert!(!is_safe_key("has space")); + } + + #[test] + fn discovery_returns_empty_outside_xilinx_layout() { + let dir = tempfile::tempdir().unwrap(); + let loose = dir.path().join("component.xml"); + std::fs::write(&loose, "").unwrap(); + assert!(load_schemas(&loose).is_empty()); + } + + /// Build a tempdir mimicking the Xilinx layout closely enough to + /// exercise the loader end-to-end without a Vivado install. + #[test] + fn loads_minimum_viable_schema_from_synthetic_layout() { + let dir = tempfile::tempdir().unwrap(); + let data = dir.path().join("data"); + + let pspmc = data.join("versal/cips_hip/pspmc"); + let csvs = + data.join("versal/flows/automation/cipsToPsWiz_Porting/csv_files"); + let global = pspmc.join("global"); + let guidata = pspmc.join("guidata"); + let presets = pspmc.join("presets"); + std::fs::create_dir_all(&csvs).unwrap(); + std::fs::create_dir_all(&global).unwrap(); + std::fs::create_dir_all(&guidata).unwrap(); + std::fs::create_dir_all(&presets).unwrap(); + + std::fs::write( + csvs.join("param_mapping_direct.csv"), + "\ +PCIE_APERTURES_DUAL_ENABLE,{0},PCIE_APERTURES_DUAL_ENABLE,{0} +PS_PCIE_RESET,{{ENABLE 0}},PS_PCIE_RESET,{ENABLE 0 IO PS_MIO_18:19} +SMON_ALARMS,{Set_Alarms_On},SMON_ALARMS,{Set_Alarms_On} +CPM_PCIE0_MODES,{None},CPM_PCIE0_MODES,{None} +", + ) + .unwrap(); + std::fs::write( + csvs.join("param_mapping_presets.csv"), + "\ +BOOT_MODE,CLOCK_MODE +BOOT_MODE,JTAG Boot +BOOT_MODE,Master Mode +CLOCK_MODE,Custom +CLOCK_MODE,REF CLK 33.33 MHz +", + ) + .unwrap(); + std::fs::write( + guidata.join("ParamInfo.xml"), + r#" + + + What do you want to do with Alarms? + + +"#, + ) + .unwrap(); + std::fs::write( + presets.join("sysmon.xml"), + r#" + + + +"#, + ) + .unwrap(); + // Empty global presets so the loader still finds the file. + std::fs::write(global.join("global_preset.xml"), "").unwrap(); + + let component = data.join("ip/xilinx/versal_cips_v3_4/component.xml"); + std::fs::create_dir_all(component.parent().unwrap()).unwrap(); + std::fs::write(&component, "").unwrap(); + + let schemas = load_schemas(&component); + assert!( + schemas.contains_key("PS_PMC_CONFIG"), + "schemas: {schemas:?}" + ); + assert!(schemas.contains_key("PS_PMC_CONFIG_INTERNAL")); + let s = &schemas["PS_PMC_CONFIG"]; + let by_name: HashMap<&str, &DictField> = + s.fields.iter().map(|f| (f.name.as_str(), f)).collect(); + // From direct.csv (with CPM_ filtered out): + assert!(by_name.contains_key("PCIE_APERTURES_DUAL_ENABLE")); + assert_eq!(by_name["PCIE_APERTURES_DUAL_ENABLE"].default, "0"); + assert_eq!( + by_name["PS_PCIE_RESET"].default, "{ENABLE 0}", + "should strip one brace layer" + ); + // CPM_ keys are filtered out. + assert!(!by_name.contains_key("CPM_PCIE0_MODES")); + // From ParamInfo: description present for SMON_ALARMS. + assert_eq!( + by_name["SMON_ALARMS"].description.as_deref(), + Some("What do you want to do with Alarms?") + ); + // From presets: enum widened. + assert!(by_name["SMON_ALARMS"].enum_values.contains("Set_Alarms_On")); + assert!(by_name["SMON_ALARMS"] + .enum_values + .contains("Set_Alarms_Off")); + // From presets.csv: CLOCK_MODE present with "Custom" as default. + assert!(by_name.contains_key("CLOCK_MODE")); + assert_eq!(by_name["CLOCK_MODE"].default, "Custom"); + assert!(by_name["CLOCK_MODE"].enum_values.contains("Custom")); + // BOOT_MODE's CSV row only lists "JTAG Boot" and "Master Mode" — + // we should still inject `Custom` automatically (Vivado convention). + assert_eq!(by_name["BOOT_MODE"].default, "Custom"); + assert!(by_name["BOOT_MODE"].enum_values.contains("Custom")); + assert!(by_name["BOOT_MODE"].enum_values.contains("JTAG Boot")); + } +} diff --git a/vw-ip/src/generate.rs b/vw-ip/src/generate.rs new file mode 100644 index 0000000..2f24b63 --- /dev/null +++ b/vw-ip/src/generate.rs @@ -0,0 +1,1041 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Emit an htcl wrapper proc for an IP-XACT component. +//! +//! Two shapes, picked by `split_threshold`: +//! +//! - **Single-proc** (small IPs like CIPS with 19 params): one +//! `create_` proc whose structured args mirror the IP's +//! parameters. Each arg gets `@default()` from the IP-XACT +//! default; `@enum(...)` when the parameter has a `choiceRef`. The +//! body emits `set_property -dict [list ...]` mapping each arg back +//! to its `CONFIG.` key. +//! +//! - **Split** (large IPs like CPM5 with ~8700 params): a top +//! `create_` proc that just creates the bd_cell and returns +//! its handle, plus one `create__` sub-proc per +//! parameter prefix group. Each sub-proc takes the cell handle as +//! its first arg, then its own group's parameters. Small groups +//! (< `min_group_size`) collapse into a `_misc` sub-proc so we end +//! up with a manageable handful rather than dozens of singletons. +//! +//! Call-site composition: +//! ```tcl +//! set cps [create_cpm5 cps] +//! create_cpm5_pcie1 $cps -max_link_speed "32.0_GT/s" -modes PCIE +//! ``` + +use std::fmt::Write; + +use ipxact::{Component, Parameter}; +use vw_htcl::emit::{Command, Doc, Item, Word}; + +use crate::tree::{build_tree, strip_prefix, Node, TreeOptions}; + +#[derive(Clone, Debug)] +pub struct GenerateOptions { + /// Emit `## ` doc comments for parameters that have a + /// description in IP-XACT. + pub include_descriptions: bool, + /// Skip auto-resolve parameters; emit only user-configurable ones. + pub user_configurable_only: bool, + /// When the parameter count exceeds this, the generator emits a + /// hierarchy of procs instead of one. Tuned so CIPS (19) stays + /// single and CPM5 (8673) splits. + pub split_threshold: usize, + /// Don't split a subgroup into its own child proc unless it has at + /// least this many parameters. Smaller subgroups stay as direct + /// args of the parent so we don't get a long tail of singleton + /// procs. + pub min_split_size: usize, +} + +impl Default for GenerateOptions { + fn default() -> Self { + Self { + include_descriptions: true, + user_configurable_only: true, + split_threshold: 100, + min_split_size: 8, + } + } +} + +/// Generate the htcl wrapper text for `component`. +/// +/// `presets` carries supplementary parameter-value information loaded +/// from out-of-band sources (e.g. Vivado's `cpm_preset*.xml`); pass an +/// empty map when there are none. Values from `presets` are merged +/// with the IP-XACT `` entries when emitting `@enum(...)`. +pub fn generate( + component: &Component, + presets: &crate::presets::PresetMap, + dict_schemas: &std::collections::HashMap, + opts: &GenerateOptions, +) -> String { + let parameters: Vec<&Parameter> = component + .component_parameters() + .filter(|p| { + !opts.user_configurable_only || p.value.is_user_configurable() + }) + .collect(); + let mut out = if parameters.len() > opts.split_threshold { + generate_split(component, presets, ¶meters, opts) + } else { + generate_single(component, presets, ¶meters, opts) + }; + if !dict_schemas.is_empty() { + append_dict_sub_procs(&mut out, component, dict_schemas, opts); + } + out +} + +/// Append `create__` sub-procs — one for each IP-XACT +/// `structured_tcldict` parameter we have a schema for. Each builds a +/// Tcl dict-list of `KEY VALUE` pairs the user passes back into the +/// top proc's `-` argument. +fn append_dict_sub_procs( + out: &mut String, + component: &Component, + dict_schemas: &std::collections::HashMap, + opts: &GenerateOptions, +) { + let ip_name = sanitize_ident(&component.name); + let top_proc = format!("create_{ip_name}"); + let mut keys: Vec<&String> = dict_schemas.keys().collect(); + keys.sort(); + for param_name in keys { + let schema = &dict_schemas[param_name]; + if schema.fields.is_empty() { + continue; + } + writeln!(out).unwrap(); + emit_dict_sub_proc(out, &top_proc, param_name, schema, opts); + } +} + +fn emit_dict_sub_proc( + out: &mut String, + top_proc: &str, + param_name: &str, + schema: &crate::DictSchema, + opts: &GenerateOptions, +) { + let suffix = sanitize_ident(¶m_name.to_ascii_lowercase()); + let sub_name = format!("{top_proc}_{suffix}"); + + let mut doc = Doc::new(); + doc.push(Item::DocComment(format!( + "Apply a `CONFIG.{param_name}` value to a block-design cell.", + ))); + doc.push(Item::DocComment(format!( + "Pass the cell handle returned by `{top_proc}`.", + ))); + doc.push(Item::Blank); + doc.push(Item::DocComment( + "Block-design cell to set the property on.".into(), + )); + doc.push(Item::Command(Command { + doc_comments: Vec::new(), + // `cell: bd_cell` — typed arg. The parser tokenizes the + // ident `cell`, the `:` separator, and the type word + // `bd_cell` separately; emitting them as two adjacent + // bare words renders the source the way a human would + // write it (`cell: bd_cell`). + words: vec![Word::Bare("cell:".into()), Word::Bare("bd_cell".into())], + body: None, + })); + if !schema.fields.is_empty() { + doc.push(Item::Blank); + } + for f in &schema.fields { + emit_dict_field_arg(&mut doc, f, opts); + } + + // Build the inner CONFIG. dict conditionally so unsupplied + // args never reach Vivado. Unconditionally emitting every field + // (even at its declared default) re-validates the whole cell and + // Vivado rejects values whose defaults happen to be out-of-range + // for the cell's current state — e.g. `STRADDLE_SIZE=0` when the + // valid enum is `{None, 2_TLP}`. The `__vw_kw__set` flag is + // set by `::vw::kwargs` (shim helper) only when the user passed + // a value for that arg, so the test filters out the defaults. + let mut body = String::new(); + writeln!(body, "set _vw_inner [list]").unwrap(); + for f in &schema.fields { + let arg = lowercase_ident(&f.name); + writeln!( + body, + "if {{${{__vw_kw_{arg}_set}}}} \ + {{ lappend _vw_inner {} ${arg} }}", + f.name + ) + .unwrap(); + } + writeln!(body, "if {{[llength $_vw_inner] > 0}} {{").unwrap(); + writeln!( + body, + " vivado_cmd::set_property -dict \ + [list CONFIG.{param_name} $_vw_inner] -objects $cell" + ) + .unwrap(); + writeln!(body, "}}").unwrap(); + // Dict-sub procs configure an existing cell; they don't + // produce a new one. Return type is `unit` so the REPL + // suppresses the (meaningless) empty-string result. + emit_proc(out, &sub_name, &doc, Some("unit"), &body); +} + +fn emit_dict_field_arg( + doc: &mut Doc, + f: &crate::DictField, + opts: &GenerateOptions, +) { + if opts.include_descriptions { + if let Some(desc) = f.description.as_deref().filter(|s| !s.is_empty()) { + for line in desc.lines() { + doc.push(Item::DocComment(line.trim_end().into())); + } + } + } + let mut words = Vec::new(); + if !f.enum_values.is_empty() { + let formatted: Vec = f + .enum_values + .iter() + .map(|v| format_attribute_value(v)) + .collect(); + words.push(Word::Raw(format!("@enum({})", formatted.join(", ")))); + } + // Dict fields are always optional: Vivado treats an unset + // inner key as "use the IP's implicit default", so make every + // arg defaultable. When the Xilinx CSV didn't yield a default — + // either the row was missing one or the value had unbalanced + // braces and was rejected — fall back to an empty string so the + // user can omit the arg and let Vivado decide. + words.push(Word::Raw(format!( + "@default({})", + format_attribute_value(&f.default) + ))); + let lowered = lowercase_ident(&f.name); + words.push(Word::Bare(lowered)); + doc.push(Item::Command(Command { + doc_comments: Vec::new(), + words, + body: None, + })); +} + +// --------------------------------------------------------------------------- +// Single-proc shape. +// --------------------------------------------------------------------------- + +fn generate_single( + component: &Component, + presets: &crate::presets::PresetMap, + parameters: &[&Parameter], + opts: &GenerateOptions, +) -> String { + let vlnv = component.vlnv(); + let proc_name = format!("create_{}", sanitize_ident(&component.name)); + + let mut out = String::new(); + emit_file_header(&mut out, component, &vlnv); + writeln!( + out, + "## ({} configurable parameter{})", + parameters.len(), + if parameters.len() == 1 { "" } else { "s" } + ) + .unwrap(); + + let mut proc_doc = Doc::new(); + proc_doc.push(Item::DocComment( + "Instance name in the block design.".into(), + )); + proc_doc.push(Item::Command(Command::call( + "name", + std::iter::empty::(), + ))); + if !parameters.is_empty() { + proc_doc.push(Item::Blank); + } + for p in parameters { + emit_arg_decl(&mut proc_doc, component, presets, p, opts, ""); + } + + let body = build_single_body(&vlnv, parameters); + // The single-shape proc creates a bd_cell and `return $cell`s. + emit_proc(&mut out, &proc_name, &proc_doc, Some("bd_cell"), &body); + out +} + +fn build_single_body(vlnv: &str, parameters: &[&Parameter]) -> String { + let mut out = String::new(); + writeln!( + out, + "set cell [vivado_cmd::create_bd_cell -type ip -vlnv {vlnv} -name $name]" + ) + .unwrap(); + if !parameters.is_empty() { + write_set_property_dict(&mut out, parameters, ""); + } + // Every `create_` proc must return the cell handle so the + // caller can pass it back into the IP's sub-procs (or any other + // wrapper that takes `-cell $x`). Without the explicit return, + // the proc returns whatever its last statement returned — which + // is `""` for the empty-conditional-dict shape, breaking + // downstream callers with cryptic `Missing value for option + // 'objects'` errors. + writeln!(out, "return $cell").unwrap(); + out +} + +// --------------------------------------------------------------------------- +// Split shape: top proc + one sub-proc per prefix group. +// --------------------------------------------------------------------------- + +fn generate_split( + component: &Component, + presets: &crate::presets::PresetMap, + parameters: &[&Parameter], + opts: &GenerateOptions, +) -> String { + let vlnv = component.vlnv(); + let ip_name = sanitize_ident(&component.name); + let top_proc = format!("create_{ip_name}"); + + let tree = build_tree( + parameters.iter().copied(), + &TreeOptions { + min_split_size: opts.min_split_size, + }, + ); + + // Collect every node that will emit a proc — the root for the + // top-level `create_` and every non-root node that has at least + // one direct parameter to configure. + let all_nodes = tree.collect(); + let emit_nodes: Vec<&Node> = all_nodes + .iter() + .copied() + .filter(|n| n.label.is_empty() || !n.direct.is_empty()) + .collect(); + + let mut out = String::new(); + emit_file_header(&mut out, component, &vlnv); + writeln!( + out, + "## {} configurable parameter{} across {} proc{}.", + parameters.len(), + if parameters.len() == 1 { "" } else { "s" }, + emit_nodes.len(), + if emit_nodes.len() == 1 { "" } else { "s" } + ) + .unwrap(); + writeln!(out, "##").unwrap(); + writeln!(out, "## Usage:").unwrap(); + writeln!(out, "## set cell [{top_proc} ]").unwrap(); + writeln!( + out, + "## $cell - ... ;# tab-complete by prefix" + ) + .unwrap(); + + // Top proc: creates the cell and returns it. Any tree-root direct + // params live here too — though for IPs whose params all share a + // common first segment (CPM5, CIPS), the root has none. + let mut top_doc = Doc::new(); + top_doc.push(Item::DocComment( + "Instance name in the block design.".into(), + )); + top_doc.push(Item::Command(Command::call( + "name", + std::iter::empty::(), + ))); + if !tree.direct.is_empty() { + top_doc.push(Item::Blank); + for p in &tree.direct { + emit_arg_decl(&mut top_doc, component, presets, p, opts, ""); + } + } + let mut top_body = format!( + "set cell [vivado_cmd::create_bd_cell -type ip -vlnv {vlnv} -name $name]\n" + ); + if !tree.direct.is_empty() { + write_set_property_dict(&mut top_body, &tree.direct, ""); + } + writeln!(top_body, "return $cell").unwrap(); + // Top split-shape proc: creates the bd_cell. + emit_proc(&mut out, &top_proc, &top_doc, Some("bd_cell"), &top_body); + + // One proc per non-root node that has direct parameters. + for n in emit_nodes.iter().filter(|n| !n.label.is_empty()) { + writeln!(out).unwrap(); + let suffix = sanitize_ident(&n.label.to_ascii_lowercase()); + let sub_name = format!("{top_proc}_{suffix}"); + + let mut sub_doc = Doc::new(); + sub_doc.push(Item::DocComment(format!( + "Block-design cell handle returned by `{top_proc}`.", + ))); + sub_doc.push(Item::Command(Command::call( + "cell:", + std::iter::once(Word::Bare("bd_cell".into())), + ))); + if !n.direct.is_empty() { + sub_doc.push(Item::Blank); + } + for p in &n.direct { + emit_arg_decl(&mut sub_doc, component, presets, p, opts, &n.label); + } + + let mut body = String::new(); + write_set_property_dict(&mut body, &n.direct, &n.label); + // Return the cell the user passed in. Lets `set x [create__ + // -cell $cell ...]` round-trip the handle for downstream calls and + // avoids `$x = ""` when the conditional-dict had zero supplied args. + writeln!(body, "return $cell").unwrap(); + // Sub-procs propagate the bd_cell they were handed. + emit_proc(&mut out, &sub_name, &sub_doc, Some("bd_cell"), &body); + } + + out +} + +// --------------------------------------------------------------------------- +// Shared helpers. +// --------------------------------------------------------------------------- + +fn emit_file_header(out: &mut String, component: &Component, vlnv: &str) { + // Pull in the whole `vivado-cmd` library — the body uses + // `vivado_cmd::create_bd_cell` and `vivado_cmd::set_property` + // alongside `ip::check`, so `src @vivado-cmd/ip` (just the + // ip sub-module) leaves those references unresolved. The + // analyzer reports the unbound calls; sourcing the full + // package brings everything the emitted body actually uses + // into scope. + writeln!(out, "src @vivado-cmd").unwrap(); + writeln!(out).unwrap(); + writeln!(out, "ip::check -name \"{vlnv}\"").unwrap(); + writeln!(out).unwrap(); + if let Some(desc) = + component.description.as_deref().filter(|s| !s.is_empty()) + { + // Split the IP-XACT description into a one-sentence summary + // plus body so an LSP client can show a short blurb on hover + // / completion without repeating it in the documentation + // popup. Same shape as the cmd-doc generator. + let raw: Vec = + desc.lines().map(|l| l.trim_end().to_string()).collect(); + let summary = vw_htcl::doc::brief(&raw); + let extended = vw_htcl::doc::extended(&raw); + if let Some(s) = summary { + for line in vw_htcl::doc::wrap_paragraph(&s, 78) { + writeln!(out, "## {line}").unwrap(); + } + } + if let Some(body) = extended { + for paragraph in body.split("\n\n") { + writeln!(out, "##").unwrap(); + for line in vw_htcl::doc::wrap_paragraph(paragraph, 78) { + writeln!(out, "## {line}").unwrap(); + } + } + } + writeln!(out, "##").unwrap(); + } + writeln!(out, "## Source IP-XACT: {vlnv}").unwrap(); +} + +/// Emit `proc { } ? { }` with the args +/// and body indented two spaces each. When `return_type` is Some, +/// emits it as the 4th htcl word between args and body. +fn emit_proc( + out: &mut String, + name: &str, + args: &Doc, + return_type: Option<&str>, + body: &str, +) { + let args_text = args.to_string(); + writeln!(out, "proc {name} {{").unwrap(); + for line in args_text.lines() { + if line.is_empty() { + writeln!(out).unwrap(); + } else { + writeln!(out, " {line}").unwrap(); + } + } + match return_type { + Some(ty) => { + let needs_brace = ty.chars().any(char::is_whitespace); + if needs_brace { + writeln!(out, "}} {{{ty}}} {{").unwrap(); + } else { + writeln!(out, "}} {ty} {{").unwrap(); + } + } + None => { + writeln!(out, "}} {{").unwrap(); + } + } + for line in body.lines() { + if line.is_empty() { + writeln!(out).unwrap(); + } else { + writeln!(out, " {line}").unwrap(); + } + } + writeln!(out, "}}").unwrap(); +} + +/// Emit `set_property -dict [list \ … ]` for `parameters`. Arg names +/// are built by stripping `prefix_to_strip` from each parameter's full +/// IP-XACT name (so a `CPM_PCIE1_PF0_BAR0_ENABLED` parameter inside a +/// proc scoped at `CPM_PCIE1_PF0_BAR0` reads back as `$enabled`), +/// while the `CONFIG.` key keeps the full name Vivado expects. +fn write_set_property_dict( + out: &mut String, + parameters: &[&Parameter], + prefix_to_strip: &str, +) { + // Build the dict conditionally so only user-supplied args reach + // Vivado. See `emit_dict_proc` for the rationale — unconditionally + // setting all CONFIG.* properties re-validates the whole cell and + // Vivado rejects values whose declared defaults happen to be + // out-of-range for the cell's current state. The + // `__vw_kw__set` flag is set by `::vw::kwargs` (shim helper) + // only when the user passed a value for that arg. + writeln!(out, "set _vw_d [list]").unwrap(); + for p in parameters { + let arg = lowercase_ident(strip_prefix(&p.name, prefix_to_strip)); + // Properties-typed args (paired-dict-shaped defaults; see + // [`emit_arg_decl`]) get unwrapped through `Properties::to_raw` + // before flowing into `set_property -dict`. Vivado expects + // a bare paired-list of keys + values for CONFIG.* dict + // slots — without the unwrap, the tagged tuple + // (`Nested {... Scalar X ...}`) would be passed verbatim. + let value_expr = if is_properties_shaped(p.value.default_value()) { + format!("[Properties::to_raw -v ${arg}]") + } else { + format!("${arg}") + }; + writeln!( + out, + "if {{${{__vw_kw_{arg}_set}}}} \ + {{ lappend _vw_d CONFIG.{} {value_expr} }}", + p.name + ) + .unwrap(); + } + writeln!(out, "if {{[llength $_vw_d] > 0}} {{").unwrap(); + writeln!( + out, + " vivado_cmd::set_property -dict $_vw_d -objects $cell" + ) + .unwrap(); + writeln!(out, "}}").unwrap(); +} + +fn emit_arg_decl( + doc: &mut Doc, + component: &Component, + presets: &crate::presets::PresetMap, + p: &Parameter, + opts: &GenerateOptions, + prefix_to_strip: &str, +) { + if opts.include_descriptions { + if let Some(desc) = p.description.as_deref().filter(|s| !s.is_empty()) { + for line in desc.lines() { + doc.push(Item::DocComment(line.trim_end().into())); + } + } + } + let mut words = Vec::new(); + let enum_values = enum_values_for(component, presets, p); + if !enum_values.is_empty() { + let formatted: Vec = enum_values + .iter() + .map(|v| format_attribute_value(v)) + .collect(); + words.push(Word::Raw(format!("@enum({})", formatted.join(", ")))); + } + let default = p.value.default_value(); + if !default.is_empty() { + words.push(Word::Raw(format!( + "@default({})", + format_attribute_value(default) + ))); + } + let lowered = lowercase_ident(strip_prefix(&p.name, prefix_to_strip)); + // Type the arg as `Properties` when the default value parses as + // a paired-dict (even-length list with identifier keys) — + // Vivado's IP-customization slots like `cpm_config` / `ps_pmc_config` + // consume a CONFIG.* dict, and typing them as `Properties` + // lets callers pass typed values from `util::props` / + // dict-traversal. The wrapper body wraps the arg with + // `Properties::to_raw` at the `set_property -dict` call + // (see [`write_set_property_dict`]) so the boundary + // strips the tags before Vivado sees them. + let typed_name = if is_properties_shaped(default) { + format!("{lowered}: Properties") + } else { + lowered + }; + words.push(Word::Bare(typed_name)); + doc.push(Item::Command(Command { + doc_comments: Vec::new(), + words, + body: None, + })); +} + +/// Union of the parameter's IP-XACT `` values and any +/// presets, in insertion order. Order is *IP-XACT first* (preserving +/// the vendor's intended ordering when both sources agree) followed +/// by preset-only values; duplicates are filtered. +fn enum_values_for( + component: &Component, + presets: &crate::presets::PresetMap, + p: &Parameter, +) -> Vec { + let mut seen = std::collections::HashSet::new(); + let mut out: Vec = Vec::new(); + if let Some(choice) = p + .value + .choice_ref + .as_deref() + .and_then(|name| component.find_choice(name)) + { + for e in &choice.enumerations { + if seen.insert(e.value.clone()) { + out.push(e.value.clone()); + } + } + } + if let Some(extra) = presets.get(&p.name) { + for v in extra { + if seen.insert(v.clone()) { + out.push(v.clone()); + } + } + } + out +} + +/// True when the default value parses as a paired-dict Tcl-list +/// shape — i.e. has an even number of whitespace-separated tokens +/// (≥ 2) with identifier-shaped keys at every even index. Used by +/// [`emit_arg_decl`] / [`write_set_property_dict`] to decide which +/// wrapper args get the typed `Properties` annotation + automatic +/// `Properties::to_raw` unwrap at the extern boundary. +/// +/// Vivado IP-XACT defaults for CONFIG.* dict slots typically look +/// like `"KEY1 VAL1 KEY2 VAL2"` (e.g. `CPM_PCIE0_MODES None`, +/// `SMON_ALARMS Set_Alarms_On SMON_ENABLE_TEMP_AVERAGING 0`), +/// while scalar params look like `Custom` or `0` or +/// `versal_cips_v3_4`. Two-pair-shaped strings whose keys happen +/// to be bare-identifier-shaped slip through as Properties even +/// when the IP author meant them as a scalar — unlikely enough +/// to be acceptable noise; the wrapper still works when the +/// caller passes a string-shaped raw value (it round-trips through +/// `Properties::to_raw` returning the same paired list). +fn is_properties_shaped(default: &str) -> bool { + let tokens: Vec<&str> = default.split_whitespace().collect(); + if tokens.len() < 2 || !tokens.len().is_multiple_of(2) { + return false; + } + for (i, t) in tokens.iter().enumerate() { + if i % 2 != 0 { + continue; + } + let mut chars = t.chars(); + let first = match chars.next() { + Some(c) => c, + None => return false, + }; + if !first.is_ascii_alphabetic() && first != '_' { + return false; + } + for c in chars { + if !(c.is_ascii_alphanumeric() || c == '_' || c == '.') { + return false; + } + } + } + true +} + +/// Lowercase an IP-XACT parameter name into a valid htcl argument +/// name. The htcl proc-arg grammar is `/[a-zA-Z_][a-zA-Z0-9_]*/`, so +/// an empty result or a digit-leading result (which prefix-stripping +/// can produce — e.g. `64BIT` after stripping `CPM_PCIE1_PF0_BAR0_`) +/// gets a leading underscore to land back in the grammar. +fn lowercase_ident(name: &str) -> String { + let mut out = String::with_capacity(name.len() + 1); + for c in name.chars() { + if c.is_ascii_alphanumeric() || c == '_' { + out.push(c.to_ascii_lowercase()); + } else { + out.push('_'); + } + } + let needs_leading_underscore = out + .as_bytes() + .first() + .map(|b| b.is_ascii_digit()) + .unwrap_or(true); + if needs_leading_underscore { + out.insert(0, '_'); + } + out +} + +/// Sanitize an arbitrary string for use as an htcl identifier. +fn sanitize_ident(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for c in s.chars() { + if c.is_ascii_alphanumeric() || c == '_' { + out.push(c); + } else { + out.push('_'); + } + } + out +} + +/// Render an IP-XACT default value as it should appear inside an +/// `@default(...)` attribute. The htcl proc-args grammar accepts only +/// three attribute-value forms — `integer_literal`, `attribute_value_ident` +/// (`[a-zA-Z_][a-zA-Z0-9_]*`), and double-quoted strings. Anything that +/// isn't a clean ident or integer is double-quoted (with `"` escaped). +fn format_attribute_value(s: &str) -> String { + if is_integer_literal(s) || is_attribute_ident(s) { + s.to_string() + } else { + format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\"")) + } +} + +fn is_integer_literal(s: &str) -> bool { + let body = s.strip_prefix('-').unwrap_or(s); + !body.is_empty() && body.bytes().all(|b| b.is_ascii_digit()) +} + +fn is_attribute_ident(s: &str) -> bool { + let mut chars = s.chars(); + let Some(first) = chars.next() else { + return false; + }; + if !(first.is_ascii_alphabetic() || first == '_') { + return false; + } + chars.all(|c| c.is_ascii_alphanumeric() || c == '_') +} + +#[cfg(test)] +mod tests { + use super::*; + use ipxact::{ + Choice, Choices, Component, Enumeration, ParamValue, Parameter, + Parameters, + }; + + fn mk_component() -> Component { + Component { + vendor: "acme".into(), + library: "ip".into(), + name: "demo".into(), + version: "1.0".into(), + description: Some("A demo IP.".into()), + parameters: Some(Parameters { + entries: vec![ + Parameter { + name: "BUS_WIDTH".into(), + description: Some("Bus width in bits.".into()), + value: ParamValue { + text: "32".into(), + resolve: Some("user".into()), + ..Default::default() + }, + ..Default::default() + }, + Parameter { + name: "MODE".into(), + value: ParamValue { + text: "FAST".into(), + resolve: Some("user".into()), + choice_ref: Some("mode_choices".into()), + ..Default::default() + }, + ..Default::default() + }, + ], + }), + choices: Some(Choices { + entries: vec![Choice { + name: "mode_choices".into(), + enumerations: vec![ + Enumeration { + value: "FAST".into(), + ..Default::default() + }, + Enumeration { + value: "SLOW".into(), + ..Default::default() + }, + ], + }], + }), + ..Default::default() + } + } + + fn mk_split_component(n_per_group: usize) -> Component { + // Build a component with two big groups and a smattering of + // small ones, all above the split threshold. + let mut entries = Vec::new(); + for i in 0..n_per_group { + entries.push(Parameter { + name: format!("BIG_ONE_FIELD{i}"), + value: ParamValue { + text: "0".into(), + resolve: Some("user".into()), + ..Default::default() + }, + ..Default::default() + }); + entries.push(Parameter { + name: format!("BIG_TWO_FIELD{i}"), + value: ParamValue { + text: "1".into(), + resolve: Some("user".into()), + ..Default::default() + }, + ..Default::default() + }); + } + // A pair of tiny groups that should be collapsed into _misc. + for name in ["TINY_A_ONE", "TINY_B_ONE", "TINY_C_ONE", "STRAY_THING"] { + entries.push(Parameter { + name: name.into(), + value: ParamValue { + text: "x".into(), + resolve: Some("user".into()), + ..Default::default() + }, + ..Default::default() + }); + } + Component { + vendor: "acme".into(), + library: "ip".into(), + name: "wide".into(), + version: "1.0".into(), + parameters: Some(Parameters { entries }), + ..Default::default() + } + } + + #[test] + fn single_mode_below_threshold() { + let out = generate( + &mk_component(), + &Default::default(), + &::std::collections::HashMap::new(), + &GenerateOptions::default(), + ); + let n_procs = + out.matches("\nproc ").count() + out.starts_with("proc ") as usize; + assert_eq!(n_procs, 1, "{out}"); + assert!(out.contains("proc create_demo")); + } + + #[test] + fn split_mode_emits_top_and_sub_procs() { + let component = mk_split_component(60); // 60 * 2 + 4 = 124 params > 100 + let out = generate( + &component, + &Default::default(), + &::std::collections::HashMap::new(), + &GenerateOptions::default(), + ); + eprintln!("--- generated ---\n{out}\n--- end ---"); + assert!(out.contains("proc create_wide ")); + assert!(out.contains("proc create_wide_big_one ")); + assert!(out.contains("proc create_wide_big_two ")); + let parsed = vw_htcl::parse(&out); + assert!(parsed.errors.is_empty(), "{:?}", parsed.errors); + let diags = vw_htcl::validate(&parsed.document, &out); + let errors: Vec<_> = diags + .iter() + .filter(|d| d.severity == vw_htcl::Severity::Error) + // The generator emits calls into vivado-cmd + // (`ip::check`, `create_bd_cell`, `set_property`); + // those resolve when the wrapper is sourced through + // the loader, but this unit test runs the validator + // on the bare generated text. The unknown-call + // diagnostic is *expected* in that mode; we filter it + // out so the test catches real structural breakage. + .filter(|d| !d.message.starts_with("undefined proc")) + .collect(); + assert!(errors.is_empty(), "{errors:#?}"); + } + + #[test] + fn split_sub_procs_take_cell_as_first_arg() { + let component = mk_split_component(60); + let out = generate( + &component, + &Default::default(), + &::std::collections::HashMap::new(), + &GenerateOptions::default(), + ); + // Sub-proc args block starts with the `cell` arg. + assert!(out.contains( + "proc create_wide_big_one {\n ## Block-design cell handle" + )); + assert!(out.contains("cell\n")); + } + + #[test] + fn tiny_groups_land_on_the_parent_proc() { + let component = mk_split_component(60); + let out = generate( + &component, + &Default::default(), + &::std::collections::HashMap::new(), + &GenerateOptions::default(), + ); + // None of the tiny prefix groups becomes its own proc... + for name in [ + "create_wide_tiny_a ", + "create_wide_tiny_b ", + "create_wide_stray ", + ] { + assert!(!out.contains(name), "unexpected {name} in:\n{out}"); + } + // ...and the params instead appear as args on the top proc. + let top_block = out + .split_once("proc create_wide_big_one") + .map(|(top, _)| top.to_string()) + .unwrap_or_else(|| out.clone()); + for arg in ["tiny_a_one", "tiny_b_one", "tiny_c_one", "stray_thing"] { + assert!( + top_block.contains(arg), + "{arg} missing from top: {top_block}" + ); + } + } + + #[test] + fn arg_name_strips_node_prefix() { + // Two big groups whose internal arg names should be the + // segments *after* the group prefix, not the full name. + let entries = (0..10) + .flat_map(|i| { + [ + Parameter { + name: format!("GROUP_A_FIELD{i}"), + value: ParamValue { + text: "0".into(), + resolve: Some("user".into()), + ..Default::default() + }, + ..Default::default() + }, + Parameter { + name: format!("GROUP_B_FIELD{i}"), + value: ParamValue { + text: "0".into(), + resolve: Some("user".into()), + ..Default::default() + }, + ..Default::default() + }, + ] + }) + .collect(); + let component = Component { + vendor: "acme".into(), + library: "ip".into(), + name: "demo".into(), + version: "1.0".into(), + parameters: Some(Parameters { entries }), + ..Default::default() + }; + let opts = GenerateOptions { + split_threshold: 5, + ..GenerateOptions::default() + }; + let out = generate( + &component, + &Default::default(), + &::std::collections::HashMap::new(), + &opts, + ); + // Inside the GROUP_A proc, the arg names should be `field0`, + // not `group_a_field0`. + assert!(out.contains("@default(0) field0\n"), "{out}"); + assert!(!out.contains("group_a_field0"), "{out}"); + // The CONFIG. mapping keeps the full IP-XACT name. + assert!(out.contains("CONFIG.GROUP_A_FIELD0 $field0"), "{out}"); + } + + #[test] + fn generated_output_parses_back() { + let out = generate( + &mk_component(), + &Default::default(), + &::std::collections::HashMap::new(), + &GenerateOptions::default(), + ); + let parsed = vw_htcl::parse(&out); + assert!( + parsed.errors.is_empty(), + "parse errors: {:?}", + parsed.errors + ); + } + + #[test] + fn includes_description_as_doc_comment() { + let out = generate( + &mk_component(), + &Default::default(), + &::std::collections::HashMap::new(), + &GenerateOptions::default(), + ); + assert!(out.contains("## A demo IP."), "{out}"); + assert!(out.contains("## Bus width in bits."), "{out}"); + } + + #[test] + fn emits_default_and_enum_attributes() { + let out = generate( + &mk_component(), + &Default::default(), + &::std::collections::HashMap::new(), + &GenerateOptions::default(), + ); + assert!(out.contains("@default(32) bus_width"), "{out}"); + assert!(out.contains("@enum(FAST, SLOW)"), "{out}"); + assert!(out.contains("@default(FAST) mode"), "{out}"); + } + + #[test] + fn emits_set_property_for_each_param() { + let out = generate( + &mk_component(), + &Default::default(), + &::std::collections::HashMap::new(), + &GenerateOptions::default(), + ); + assert!(out.contains("CONFIG.BUS_WIDTH $bus_width"), "{out}"); + assert!(out.contains("CONFIG.MODE $mode"), "{out}"); + } +} diff --git a/vw-ip/src/group.rs b/vw-ip/src/group.rs new file mode 100644 index 0000000..eaf3952 --- /dev/null +++ b/vw-ip/src/group.rs @@ -0,0 +1,124 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Derive parameter groups from naming conventions. +//! +//! IP-XACT components published by Xilinx carry no machine-readable +//! grouping for their configuration parameters. The xgui Tcl scripts +//! that drive the GUI grouping are encrypted, so they're not a source +//! we can use. What we *can* use is the strong prefix structure of the +//! parameter names themselves: in CPM5, 4200 parameters start with +//! `CPM_PCIE0_`, another 4200 with `CPM_PCIE1_`, 136 with `CPM_CCIX_`, +//! and so on. That structure is the right grain for a sub-proc. +//! +//! The grouping strategy: +//! +//! 1. Split each parameter name on `_`. +//! 2. Take the first N segments as the group key. We pick N to balance +//! group cardinality vs. group size — small enough that there are +//! few groups (so each becomes a manageable proc), big enough that +//! no single group is so huge it's just a flat dump. +//! 3. Parameters with no underscore, or whose only group would be a +//! singleton, fall into a catch-all `_misc` group. + +use std::collections::BTreeMap; + +use ipxact::Parameter; + +#[derive(Clone, Debug)] +pub struct ParameterGroup<'a> { + /// Key used as the group name (e.g. `CPM_PCIE0`). + pub key: String, + /// Parameters in this group, in input order. + pub parameters: Vec<&'a Parameter>, +} + +/// Group parameters by their leading underscore-separated segments. +/// `prefix_segments` controls how many leading segments form the key: +/// 1 = `CPM`, 2 = `CPM_PCIE0`, etc. 2 is the right default for Xilinx's +/// big IPs; their first segment is a coarse domain (`CPM`, `PS`, `PMC`) +/// and the second names the controller / subsystem. +pub fn group_parameters<'a, I>( + parameters: I, + prefix_segments: usize, +) -> Vec> +where + I: IntoIterator, +{ + // BTreeMap keeps groups in a stable, readable order. + let mut groups: BTreeMap> = BTreeMap::new(); + for p in parameters { + let key = prefix_key(&p.name, prefix_segments); + groups.entry(key).or_default().push(p); + } + groups + .into_iter() + .map(|(key, parameters)| ParameterGroup { key, parameters }) + .collect() +} + +/// First `n` underscore-separated segments of `name`. If `name` has +/// fewer than `n` segments (or no underscores), returns the whole name. +/// Empty names map to the literal `_misc`. +fn prefix_key(name: &str, n: usize) -> String { + if name.is_empty() { + return "_misc".into(); + } + let mut out = String::new(); + for (i, seg) in name.split('_').enumerate().take(n) { + if i > 0 { + out.push('_'); + } + out.push_str(seg); + } + // If the name has fewer than `n` segments, we end up with the full + // name as the key — that's fine; it just means the group is named + // after the parameter itself. Singletons coalesce later if we want. + out +} + +#[cfg(test)] +mod tests { + use super::*; + + fn p(name: &str) -> Parameter { + Parameter { + name: name.into(), + ..Default::default() + } + } + + #[test] + fn groups_by_two_prefix_segments() { + let params = [ + p("CPM_PCIE0_FOO"), + p("CPM_PCIE0_BAR"), + p("CPM_PCIE1_BAZ"), + p("CPM_CCIX_QUX"), + ]; + let groups = group_parameters(¶ms, 2); + let by_key: Vec<_> = groups + .iter() + .map(|g| (g.key.clone(), g.parameters.len())) + .collect(); + assert_eq!( + by_key, + vec![ + ("CPM_CCIX".to_string(), 1), + ("CPM_PCIE0".to_string(), 2), + ("CPM_PCIE1".to_string(), 1), + ] + ); + } + + #[test] + fn names_with_fewer_segments_become_their_own_key() { + let params = [p("FOO"), p("FOO_BAR_BAZ")]; + let groups = group_parameters(¶ms, 2); + let keys: Vec<_> = groups.iter().map(|g| g.key.as_str()).collect(); + // "FOO" stays "FOO" (only one segment), "FOO_BAR_BAZ" becomes "FOO_BAR". + assert!(keys.contains(&"FOO")); + assert!(keys.contains(&"FOO_BAR")); + } +} diff --git a/vw-ip/src/lib.rs b/vw-ip/src/lib.rs new file mode 100644 index 0000000..d941094 --- /dev/null +++ b/vw-ip/src/lib.rs @@ -0,0 +1,54 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! IP-XACT → htcl wrapper generation. +//! +//! Reads an IP-XACT component description (via the `ipxact` crate) and +//! emits an htcl instantiation proc for it — the "configuration +//! interface" layer described in the project plan: one top-level proc +//! per IP, with sub-procs for parameter groups when an IP's surface is +//! too large for a single proc to be tractable (CPM5 has ~8700 +//! parameters). +//! +//! Group recovery: IP-XACT itself carries no grouping metadata for +//! large Xilinx IPs (no `` etc., and the `xgui/*.tcl` +//! files that *do* carry the UI grouping are encrypted). Instead, we +//! derive groups from the convention Xilinx uses in parameter naming — +//! `CPM_PCIE0_*`, `CPM_PCIE1_*`, `PS_PMC_*` and so on are clear +//! prefix clusters. See [`group_parameters`]. + +pub mod cips_dict; +pub mod generate; +pub mod group; +pub mod presets; +pub mod summary; +pub mod tree; + +pub use cips_dict::{ + load_schemas as load_cips_dict_schemas, DictField, DictSchema, +}; +pub use generate::{generate, GenerateOptions}; +pub use group::{group_parameters, ParameterGroup}; +pub use presets::{ + discover_for as discover_presets, load_files as load_presets, PresetMap, +}; +pub use summary::Summary; +pub use tree::{build_tree, Node, TreeOptions}; + +use std::path::Path; + +use ipxact::Component; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("loading IP-XACT component: {0}")] + Ipxact(#[from] ipxact::Error), +} + +pub type Result = std::result::Result; + +/// Load an IP-XACT component from disk. +pub fn load(path: impl AsRef) -> Result { + Ok(Component::from_file(path)?) +} diff --git a/vw-ip/src/presets.rs b/vw-ip/src/presets.rs new file mode 100644 index 0000000..710abdb --- /dev/null +++ b/vw-ip/src/presets.rs @@ -0,0 +1,268 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Out-of-band parameter value sources for IP-XACT components. +//! +//! Some Xilinx IPs (notably CIPS / CPM5) ship the bulk of their +//! parameter enumerations *outside* the IP-XACT XML, in +//! `cpm_preset*.xml` files Vivado bundles under `data/versal/ps_pmc/`. +//! Without them, parameters like `CPM_PCIE1_PF0_BASE_CLASS_MENU` would +//! only carry their declared default in the generated `@enum(...)`, +//! and there's no other principled signal to recover the legal values +//! from. This module reads those files into a flat map the generator +//! can merge against the IP-XACT `` lists. +//! +//! The XML shape is uniform across the files I've seen: +//! +//! ```xml +//! +//! +//! +//! ... +//! +//! ``` + +use std::collections::{BTreeSet, HashMap}; +use std::fs; +use std::path::{Path, PathBuf}; + +use serde::Deserialize; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("reading preset file {path}: {source}")] + Io { + path: PathBuf, + #[source] + source: std::io::Error, + }, + #[error("parsing preset file {path}: {source}")] + Xml { + path: PathBuf, + #[source] + source: quick_xml::DeError, + }, +} + +/// `param_name → set of valid values`. `BTreeSet` keeps the iteration +/// order stable so generated `@enum(...)` lists are deterministic. +pub type PresetMap = HashMap>; + +#[derive(Debug, Default, Deserialize)] +struct Root { + #[serde(default, rename = "preset")] + entries: Vec, +} + +#[derive(Debug, Deserialize)] +struct Entry { + #[serde(rename = "@param")] + param: String, + #[serde(rename = "@name")] + name: String, +} + +/// Load one preset XML file into a fresh map. +pub fn load_file(path: &Path) -> Result { + let xml = fs::read_to_string(path).map_err(|source| Error::Io { + path: path.to_path_buf(), + source, + })?; + let root: Root = + quick_xml::de::from_str(&xml).map_err(|source| Error::Xml { + path: path.to_path_buf(), + source, + })?; + let mut map = PresetMap::new(); + for e in root.entries { + map.entry(e.param).or_default().insert(e.name); + } + Ok(map) +} + +/// Load several preset XML files and merge their entries into one map. +pub fn load_files(paths: I) -> Result +where + I: IntoIterator, + I::Item: AsRef, +{ + let mut merged = PresetMap::new(); + for p in paths { + let map = load_file(p.as_ref())?; + for (param, values) in map { + merged.entry(param).or_default().extend(values); + } + } + Ok(merged) +} + +/// Try to find sibling preset files for the IP whose +/// `component.xml` lives at `component_path`. +/// +/// Walks up from the component file looking for a Vivado-style +/// `data/` ancestor directory and then peeks at +/// `data/versal/ps_pmc//`. Any `*preset*.xml` found there +/// (recursively) is returned. Returns an empty vector — not an error — +/// when the layout doesn't match; the caller should treat it as a +/// best-effort hint. +pub fn discover_for(component_path: &Path) -> Vec { + let Some(data_root) = data_root_of(component_path) else { + return Vec::new(); + }; + let ip_name = ip_name_from(component_path); + let Some(ip_name) = ip_name else { + return Vec::new(); + }; + let ip_dir = data_root.join("versal").join("ps_pmc").join(&ip_name); + if !ip_dir.is_dir() { + return Vec::new(); + } + let mut out = Vec::new(); + collect_preset_files(&ip_dir, &mut out); + out.sort(); + out +} + +/// Recurse through `dir` collecting any `*preset*.xml` file paths. +fn collect_preset_files(dir: &Path, out: &mut Vec) { + let Ok(entries) = fs::read_dir(dir) else { + return; + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + collect_preset_files(&path, out); + continue; + } + let Some(name) = path.file_name().and_then(|s| s.to_str()) else { + continue; + }; + if name.contains("preset") && name.ends_with(".xml") { + out.push(path); + } + } +} + +/// Walk up `component_path`'s ancestors looking for a directory +/// literally named `data` (Vivado's install root convention). +fn data_root_of(component_path: &Path) -> Option { + for ancestor in component_path.ancestors() { + if ancestor.file_name().and_then(|s| s.to_str()) == Some("data") { + return Some(ancestor.to_path_buf()); + } + } + None +} + +/// Recover an IP's short name from a Vivado-style versioned directory +/// (`cpm5_v1_0` → `cpm5`, `axi_dma_v7_1` → `axi_dma`). The trailing +/// `_v_` suffix is the convention Xilinx uses across IPs. +fn ip_name_from(component_path: &Path) -> Option { + let ip_dir = component_path.parent()?; + let name = ip_dir.file_name()?.to_str()?; + Some(strip_version_suffix(name).to_string()) +} + +fn strip_version_suffix(name: &str) -> &str { + // Find a trailing `_v_` and trim it. + let bytes = name.as_bytes(); + let mut end = bytes.len(); + // Trailing digits (minor) + while end > 0 && bytes[end - 1].is_ascii_digit() { + end -= 1; + } + if end == 0 || bytes[end - 1] != b'_' { + return name; + } + let after_minor = end; + end -= 1; // skip the `_` + while end > 0 && bytes[end - 1].is_ascii_digit() { + end -= 1; + } + let after_major_digits = end; + if end < 1 || &bytes[end.saturating_sub(2)..end] != b"_v" { + // Doesn't end in `_v_` — leave as-is. + return name; + } + let _ = after_minor; + let _ = after_major_digits; + &name[..end - 2] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_preset_file() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("p.xml"); + fs::write( + &path, + r#" + + + + "#, + ) + .unwrap(); + let m = load_file(&path).unwrap(); + let a: Vec<&str> = m["A"].iter().map(String::as_str).collect(); + assert_eq!(a, vec!["x", "y"]); + assert!(m["B"].contains("z")); + } + + #[test] + fn merges_multiple_files() { + let dir = tempfile::tempdir().unwrap(); + let p1 = dir.path().join("a.xml"); + fs::write(&p1, r#""#) + .unwrap(); + let p2 = dir.path().join("b.xml"); + fs::write(&p2, r#""#) + .unwrap(); + let m = load_files(&[p1, p2]).unwrap(); + let v: Vec<&str> = m["K"].iter().map(String::as_str).collect(); + assert_eq!(v, vec!["1", "2"]); + } + + #[test] + fn strips_xilinx_version_suffix() { + assert_eq!(strip_version_suffix("cpm5_v1_0"), "cpm5"); + assert_eq!(strip_version_suffix("axi_dma_v7_1"), "axi_dma"); + // No version → unchanged. + assert_eq!(strip_version_suffix("foo_bar"), "foo_bar"); + // Almost-but-not version → unchanged. + assert_eq!(strip_version_suffix("foo_v1"), "foo_v1"); + } + + #[test] + fn discovers_under_data_layout() { + let dir = tempfile::tempdir().unwrap(); + // Mimic Xilinx layout: data/ip/xilinx/_v1_0/component.xml + let data = dir.path().join("data"); + let ip = data.join("ip").join("xilinx").join("widget_v2_3"); + fs::create_dir_all(&ip).unwrap(); + let component = ip.join("component.xml"); + fs::write(&component, "").unwrap(); + // And sibling: data/versal/ps_pmc/widget/p.xml + let preset_dir = data.join("versal").join("ps_pmc").join("widget"); + fs::create_dir_all(&preset_dir).unwrap(); + let preset = preset_dir.join("my_preset.xml"); + fs::write(&preset, "").unwrap(); + // Unrelated file shouldn't be picked up. + fs::write(preset_dir.join("README.md"), "ignored").unwrap(); + + let found = discover_for(&component); + assert_eq!(found, vec![preset]); + } + + #[test] + fn discovery_empty_when_layout_doesnt_match() { + let dir = tempfile::tempdir().unwrap(); + let component = dir.path().join("loose.xml"); + fs::write(&component, "").unwrap(); + assert!(discover_for(&component).is_empty()); + } +} diff --git a/vw-ip/src/summary.rs b/vw-ip/src/summary.rs new file mode 100644 index 0000000..665eabd --- /dev/null +++ b/vw-ip/src/summary.rs @@ -0,0 +1,38 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! A small, human-friendly summary of an IP-XACT component — used by +//! `vw ip generate` to print what it's processing before emitting code. + +use ipxact::Component; + +#[derive(Clone, Debug)] +pub struct Summary { + pub vlnv: String, + pub description: Option, + pub parameter_count: usize, + pub user_parameter_count: usize, + pub model_parameter_count: usize, + pub port_count: usize, + pub choice_count: usize, +} + +impl Summary { + pub fn of(c: &Component) -> Self { + let parameters: Vec<_> = c.component_parameters().collect(); + let user_parameter_count = parameters + .iter() + .filter(|p| p.value.is_user_configurable()) + .count(); + Self { + vlnv: c.vlnv(), + description: c.description.clone(), + parameter_count: parameters.len(), + user_parameter_count, + model_parameter_count: c.model_parameters().count(), + port_count: c.ports().count(), + choice_count: c.choices().count(), + } + } +} diff --git a/vw-ip/src/tree.rs b/vw-ip/src/tree.rs new file mode 100644 index 0000000..e8a7913 --- /dev/null +++ b/vw-ip/src/tree.rs @@ -0,0 +1,258 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Recursive prefix tree over parameter names. +//! +//! Big Xilinx IPs encode their configuration hierarchy in parameter +//! names, not in IP-XACT structure: `CPM_PCIE1_PF0_BAR0_64BIT` lives +//! under PCIE1 → PF0 → BAR0, but that's all conveyed by underscores. +//! A flat depth-1 grouping leaves PCIE1 with ~4200 args, which is +//! useless in an LSP. We recurse: at each depth, partition by the next +//! segment; subgroups bigger than `min_split_size` become children +//! that recurse again; everything smaller absorbs into the current +//! node as direct parameters. Generation walks the tree and emits one +//! proc per node, which keeps every proc small enough to navigate by +//! flag completion. + +use std::collections::BTreeMap; + +use ipxact::Parameter; + +#[derive(Clone, Debug)] +pub struct TreeOptions { + /// Don't split a subgroup into its own child node unless it has at + /// least this many parameters. Smaller subgroups stay as direct + /// args of the parent — keeps singleton segments from becoming + /// their own procs. + pub min_split_size: usize, +} + +impl Default for TreeOptions { + fn default() -> Self { + Self { min_split_size: 8 } + } +} + +#[derive(Clone, Debug)] +pub struct Node<'a> { + /// Full underscore-joined prefix that names this node + /// (e.g. `CPM_PCIE1_PF0`). Empty for the root. + pub label: String, + /// Underscore-separated depth: 0 at root, 1 for `CPM`, 2 for + /// `CPM_PCIE1`, 3 for `CPM_PCIE1_PF0`, ... + pub depth: usize, + /// Parameters whose proc-level args belong on *this* node. Their + /// arg names are derived by stripping the node's prefix. + pub direct: Vec<&'a Parameter>, + /// Child nodes, keyed by their additional segment. + pub children: Vec>, +} + +impl<'a> Node<'a> { + /// Total parameters reachable from this node, including children. + pub fn total_params(&self) -> usize { + self.direct.len() + + self.children.iter().map(Node::total_params).sum::() + } + + /// Number of nodes in this subtree, including self. + pub fn node_count(&self) -> usize { + 1 + self.children.iter().map(Node::node_count).sum::() + } + + /// Pre-order walk: visit self, then each child recursively. + pub fn walk(&self, f: &mut impl FnMut(&Node<'a>)) { + f(self); + for c in &self.children { + c.walk(f); + } + } + + /// Collect references to every node in this subtree in pre-order. + /// Used by code-gen, which needs to iterate the tree twice (once + /// for the header summary, once to emit procs) without re-walking + /// through a closure that can't escape `&Node` references. + pub fn collect<'t>(&'t self) -> Vec<&'t Node<'a>> { + let mut out = Vec::new(); + self.collect_into(&mut out); + out + } + + fn collect_into<'t>(&'t self, out: &mut Vec<&'t Node<'a>>) { + out.push(self); + for c in &self.children { + c.collect_into(out); + } + } +} + +/// Build the prefix tree from a flat parameter list. +pub fn build_tree<'a, I>(params: I, opts: &TreeOptions) -> Node<'a> +where + I: IntoIterator, +{ + build_node(0, String::new(), params.into_iter().collect(), opts) +} + +fn build_node<'a>( + depth: usize, + label: String, + params: Vec<&'a Parameter>, + opts: &TreeOptions, +) -> Node<'a> { + let mut direct: Vec<&'a Parameter> = Vec::new(); + let mut subgroups: BTreeMap> = BTreeMap::new(); + + for p in params { + let segs: Vec<&str> = p.name.split('_').collect(); + if depth + 1 >= segs.len() { + // No further segments to split on — this parameter belongs + // to the current node directly. + direct.push(p); + } else { + // Group by the segment at position `depth` — the next one + // not yet absorbed into the label. + subgroups + .entry(segs[depth].to_string()) + .or_default() + .push(p); + } + } + + let mut children = Vec::new(); + for (seg, group) in subgroups { + // A subgroup that's smaller than the split threshold isn't + // worth its own proc — keep its parameters at this level. + if group.len() < opts.min_split_size { + direct.extend(group); + continue; + } + let child_label = if label.is_empty() { + seg.clone() + } else { + format!("{label}_{seg}") + }; + children.push(build_node(depth + 1, child_label, group, opts)); + } + + Node { + label, + depth, + direct, + children, + } +} + +/// Return the portion of `param_name` after the node's `label_prefix` +/// (and the underscore separating them). Used so arg names inside a +/// node's proc don't redundantly repeat the prefix. +pub fn strip_prefix<'a>(param_name: &'a str, label_prefix: &str) -> &'a str { + if label_prefix.is_empty() { + return param_name; + } + if let Some(rest) = param_name.strip_prefix(label_prefix) { + rest.strip_prefix('_').unwrap_or(rest) + } else { + param_name + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn p(name: &str) -> Parameter { + Parameter { + name: name.into(), + ..Default::default() + } + } + + #[test] + fn empty_input_returns_empty_root() { + let tree = + build_tree(Vec::<&Parameter>::new(), &TreeOptions::default()); + assert_eq!(tree.label, ""); + assert_eq!(tree.direct.len(), 0); + assert_eq!(tree.children.len(), 0); + } + + #[test] + fn singletons_stay_at_root() { + let params = [p("A"), p("B"), p("C")]; + let opts = TreeOptions::default(); + let tree = build_tree(params.iter(), &opts); + // Each is one segment, no subgroups; all direct at root. + assert_eq!(tree.direct.len(), 3); + assert!(tree.children.is_empty()); + } + + #[test] + fn splits_when_subgroup_exceeds_threshold() { + let mut params: Vec = + (0..10).map(|i| p(&format!("CPM_PCIE1_FIELD{i}"))).collect(); + params.extend((0..10).map(|i| p(&format!("CPM_PCIE0_FIELD{i}")))); + let opts = TreeOptions { min_split_size: 5 }; + let tree = build_tree(params.iter(), &opts); + // Root has one child `CPM`; under `CPM`, children `PCIE0` and + // `PCIE1`, each with 10 direct params. + assert_eq!(tree.children.len(), 1); + let cpm = &tree.children[0]; + assert_eq!(cpm.label, "CPM"); + assert_eq!(cpm.children.len(), 2); + for c in &cpm.children { + assert_eq!(c.direct.len(), 10); + } + } + + #[test] + fn nested_hierarchy_splits_recursively() { + // Mimic PCIE1's structure: a bunch of PF0/PF1/PF2 sub-trees, + // each with BARs and CAPs. + let mut params: Vec = Vec::new(); + for pf in 0..3 { + for bar in 0..6 { + for f in 0..10 { + params.push(p(&format!( + "CPM_PCIE1_PF{pf}_BAR{bar}_FIELD{f}" + ))); + } + } + for cap in 0..3 { + for f in 0..5 { + params.push(p(&format!( + "CPM_PCIE1_PF{pf}_CAP{cap}_FIELD{f}" + ))); + } + } + } + let opts = TreeOptions { min_split_size: 5 }; + let tree = build_tree(params.iter(), &opts); + // Drill into the tree: root → CPM → PCIE1 → PF0/PF1/PF2. + let cpm = &tree.children[0]; + let pcie1 = &cpm.children[0]; + assert_eq!(pcie1.label, "CPM_PCIE1"); + assert_eq!(pcie1.children.len(), 3); // PF0, PF1, PF2 + let pf0 = &pcie1.children[0]; + // PF0 should have BAR0..BAR5 + CAP0..CAP2 as children. + let bar_count = pf0 + .children + .iter() + .filter(|c| c.label.contains("BAR")) + .count(); + assert_eq!(bar_count, 6, "{pf0:#?}"); + } + + #[test] + fn strip_prefix_returns_local_name() { + assert_eq!( + strip_prefix("CPM_PCIE1_PF0_BAR0_ENABLED", "CPM_PCIE1_PF0_BAR0"), + "ENABLED" + ); + // No prefix: returns the name unchanged. + assert_eq!(strip_prefix("FOO", ""), "FOO"); + // Prefix doesn't match: returns unchanged (defensive). + assert_eq!(strip_prefix("FOO_BAR", "BAZ"), "FOO_BAR"); + } +} diff --git a/vw-ip/tests/load_real_files.rs b/vw-ip/tests/load_real_files.rs new file mode 100644 index 0000000..337ebad --- /dev/null +++ b/vw-ip/tests/load_real_files.rs @@ -0,0 +1,207 @@ +// Smoke tests that load the actual Xilinx IP-XACT files from the local +// Vivado install. Skipped automatically when the files aren't present +// so this still passes in CI without a Vivado install. + +use std::path::Path; + +use vw_ip::{generate, group_parameters, load, GenerateOptions, Summary}; + +const CIPS: &str = + "/home/ry/Xilinx/2025.1/data/rsb/iprepos/versal_cips_v3_4/component.xml"; +const CPM5: &str = + "/home/ry/Xilinx/2025.1/data/ip/xilinx/cpm5_v1_0/component.xml"; + +fn skip_if_missing(p: &str) -> bool { + if Path::new(p).exists() { + false + } else { + eprintln!("skipping: {p} not present"); + true + } +} + +#[test] +fn loads_cips_component() { + if skip_if_missing(CIPS) { + return; + } + let component = load(CIPS).expect("load CIPS"); + let summary = Summary::of(&component); + eprintln!("CIPS summary: {summary:#?}"); + assert!(summary.vlnv.contains("versal_cips")); +} + +#[test] +fn loads_cpm5_component() { + if skip_if_missing(CPM5) { + return; + } + let component = load(CPM5).expect("load CPM5"); + let summary = Summary::of(&component); + eprintln!("CPM5 summary: {summary:#?}"); + assert!(summary.vlnv.contains("cpm5")); + // CPM5 should be huge. + assert!( + summary.parameter_count > 1000, + "expected many parameters, got {}", + summary.parameter_count + ); +} + +#[test] +fn generates_cips_wrapper_that_reparses() { + if skip_if_missing(CIPS) { + return; + } + let component = load(CIPS).expect("load CIPS"); + let out = generate( + &component, + &Default::default(), + &Default::default(), + &GenerateOptions::default(), + ); + eprintln!("--- generated CIPS wrapper (first 60 lines) ---"); + for line in out.lines().take(60) { + eprintln!("{line}"); + } + eprintln!("--- ({} lines total) ---", out.lines().count()); + + let parsed = vw_htcl::parse(&out); + assert!( + parsed.errors.is_empty(), + "parse errors: {:?}", + parsed.errors + ); + + // Validate the generated wrapper against its own signature using + // the same validator the LSP runs. + let diags = vw_htcl::validate(&parsed.document, &out); + let errors: Vec<_> = diags + .iter() + .filter(|d| d.severity == vw_htcl::Severity::Error) + // The generator emits calls into vivado-cmd (`ip::check`, + // `create_bd_cell`, `set_property`); those resolve when + // the wrapper is sourced through the loader, but this + // integration test validates the bare generated text. + // The unknown-call diagnostic is expected in that mode. + .filter(|d| !d.message.starts_with("undefined proc")) + .collect(); + assert!(errors.is_empty(), "validator errors: {errors:#?}"); +} + +#[test] +fn generates_cpm5_wrapper_in_split_mode() { + if skip_if_missing(CPM5) { + return; + } + let component = load(CPM5).expect("load CPM5"); + let out = generate( + &component, + &Default::default(), + &Default::default(), + &GenerateOptions::default(), + ); + + // Walk the generated source and measure per-proc arg counts so we + // can assert nothing is anywhere near the 4200-arg PCIE1 disaster + // we started with. + let mut proc_sizes: Vec<(String, usize)> = Vec::new(); + let mut current: Option<(String, usize)> = None; + let mut in_args = false; + for line in out.lines() { + if let Some(name) = line + .strip_prefix("proc ") + .and_then(|s| s.split_once(' ').map(|(n, _)| n)) + { + current = Some((name.to_string(), 0)); + in_args = true; + } else if line == "} {" + || (line.starts_with("} ") && line.ends_with(" {")) + { + // `} {` is the old (untyped) body opener; `} TYPE {` + // (e.g. `} bd_cell {`, `} unit {`) is the new + // type-annotated form added in step 6 of the + // return-type rollout. Either ends the args block. + if let Some(c) = current.take() { + proc_sizes.push(c); + } + in_args = false; + } else if in_args + && line.starts_with(" ") + && !line.trim_start().starts_with("##") + && !line.trim().is_empty() + { + if let Some(c) = current.as_mut() { + c.1 += 1; + } + } + } + proc_sizes.sort_by_key(|(_, n)| std::cmp::Reverse(*n)); + let (max_name, max_size) = proc_sizes[0].clone(); + let total_procs = proc_sizes.len(); + eprintln!( + "CPM5 wrapper: {} procs, {} lines, biggest is {} ({} args)", + total_procs, + out.lines().count(), + max_name, + max_size + ); + for (n, s) in proc_sizes.iter().take(8) { + eprintln!(" {n:>40} = {s} args"); + } + + // Hierarchical split should leave every proc small enough to + // navigate in an LSP — no more 4200-arg procs. + assert!( + max_size <= 250, + "biggest proc {max_name} still has {max_size} args; \ + hierarchy isn't splitting deep enough" + ); + // And the overall proc count should reflect that we *are* splitting. + assert!( + total_procs > 50, + "only {total_procs} procs — hierarchy isn't being built" + ); + + assert!(out.contains("proc create_cpm5 {\n ## Instance name")); + assert!(out.contains("proc create_cpm5_cpm_pcie0 ")); + assert!(out.contains("proc create_cpm5_cpm_pcie1 ")); + + let parsed = vw_htcl::parse(&out); + assert!( + parsed.errors.is_empty(), + "parse errors: {:?}", + parsed.errors + ); + let diags = vw_htcl::validate(&parsed.document, &out); + let errors: Vec<_> = diags + .iter() + .filter(|d| d.severity == vw_htcl::Severity::Error) + // The generator emits calls into vivado-cmd (`ip::check`, + // `create_bd_cell`, `set_property`); those resolve when + // the wrapper is sourced through the loader, but this + // integration test validates the bare generated text. + // The unknown-call diagnostic is expected in that mode. + .filter(|d| !d.message.starts_with("undefined proc")) + .collect(); + assert!(errors.is_empty(), "validator errors: {errors:#?}"); +} + +#[test] +fn groups_cpm5_parameters_into_handful_of_buckets() { + if skip_if_missing(CPM5) { + return; + } + let component = load(CPM5).expect("load CPM5"); + let params: Vec<_> = component.component_parameters().collect(); + let groups = group_parameters(params.iter().copied(), 2); + eprintln!("CPM5 has {} groups at prefix=2:", groups.len()); + for g in groups.iter().take(20) { + eprintln!(" {:>32} = {} params", g.key, g.parameters.len()); + } + eprintln!(" ... ({} total)", groups.len()); + // We expect a manageable number of groups (not one giant flat list, + // not thousands of singletons). + assert!(groups.len() < 200, "too many groups: {}", groups.len()); + assert!(groups.len() > 2, "too few groups: {}", groups.len()); +} diff --git a/vw-lib/src/lib.rs b/vw-lib/src/lib.rs index 7e3ab38..b4a6841 100644 --- a/vw-lib/src/lib.rs +++ b/vw-lib/src/lib.rs @@ -202,13 +202,36 @@ pub struct WorkspaceInfo { pub version: String, } -#[derive(Debug, Deserialize, Serialize)] +/// How a workspace dependency identifies its source. Currently a git +/// repo or a local filesystem path; the natural future addition is a +/// registry-resolved variant (`Registry { name, version }`) once a +/// crates.io-like index exists. +/// +/// `#[serde(untagged)]` keeps the `vw.toml` ergonomics that came +/// before: an entry with `repo = "..."` reads as `Git`, an entry with +/// `path = "..."` reads as `Path`. New variants need new +/// non-ambiguous required keys for serde to discriminate cleanly. +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(untagged)] +pub enum DependencySource { + Git { + repo: String, + #[serde(default)] + branch: Option, + #[serde(default)] + commit: Option, + #[serde(default)] + submodules: bool, + }, + Path { + path: PathBuf, + }, +} + +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct Dependency { - pub repo: String, - #[serde(default)] - pub branch: Option, - #[serde(default)] - pub commit: Option, + #[serde(flatten)] + pub source: DependencySource, #[serde(default)] pub src: Vec, #[serde(default)] @@ -216,11 +239,51 @@ pub struct Dependency { #[serde(default)] pub sim_only: bool, #[serde(default)] - pub submodules: bool, - #[serde(default)] pub exclude: Vec, } +impl Dependency { + pub fn is_local(&self) -> bool { + matches!(self.source, DependencySource::Path { .. }) + } + + /// Git-only accessor: the upstream repo URL. + pub fn repo(&self) -> Option<&str> { + match &self.source { + DependencySource::Git { repo, .. } => Some(repo), + DependencySource::Path { .. } => None, + } + } + + pub fn branch(&self) -> Option<&str> { + match &self.source { + DependencySource::Git { branch, .. } => branch.as_deref(), + DependencySource::Path { .. } => None, + } + } + + pub fn commit(&self) -> Option<&str> { + match &self.source { + DependencySource::Git { commit, .. } => commit.as_deref(), + DependencySource::Path { .. } => None, + } + } + + pub fn submodules(&self) -> bool { + match &self.source { + DependencySource::Git { submodules, .. } => *submodules, + DependencySource::Path { .. } => false, + } + } + + pub fn local_path(&self) -> Option<&Path> { + match &self.source { + DependencySource::Path { path } => Some(path.as_path()), + DependencySource::Git { .. } => None, + } + } +} + #[derive(Debug, Serialize, Deserialize)] pub struct LockFile { pub dependencies: HashMap, @@ -503,75 +566,102 @@ pub async fn update_workspace_with_token( let mut update_info = Vec::new(); for (name, dep) in &config.dependencies { - // Use credentials passed from caller let creds = credentials .as_ref() .map(|c| (c.username.as_str(), c.password.as_str())); - let commit_sha = resolve_dependency_commit( - &dep.repo, - &dep.branch, - &dep.commit, - creds, - ) - .await - .map_err(|e| VwError::Dependency { - message: format!( - "Failed to resolve commit for dependency '{name}': {e}" - ), - })?; - - let dep_path = deps_dir.join(format!("{name}-{commit_sha}")); - - let was_cached = dep_path.exists(); - - if !was_cached { - download_dependency( - &dep.repo, - &commit_sha, - &dep.src, - &dep_path, - dep.recursive, - &dep.exclude, - dep.submodules, - creds, - ) - .await - .map_err(|e| VwError::Dependency { - message: format!("Failed to download dependency '{name}': {e}"), - })?; - } - - update_info.push(DependencyUpdateInfo { - name: name.clone(), - commit: commit_sha.clone(), - was_cached, - }); - - lock_file.dependencies.insert( - name.clone(), - LockedDependency { - repo: dep.repo.clone(), - commit: commit_sha.clone(), - src: dep.src.clone(), - path: PathBuf::from(format!("{name}-{commit_sha}")), - recursive: dep.recursive, - sim_only: dep.sim_only, - submodules: dep.submodules, - exclude: dep.exclude.clone(), - }, - ); + // Decide the on-disk location for this dep: cache directory + // for git deps (downloaded if missing), or the declared + // filesystem path for local deps. Local deps don't get a lock + // entry — there's no commit to pin. + let dep_path = match &dep.source { + DependencySource::Git { + repo, + branch, + commit, + submodules, + } => { + let commit_sha = + resolve_dependency_commit(repo, branch, commit, creds) + .await + .map_err(|e| VwError::Dependency { + message: format!( + "Failed to resolve commit for dependency '{name}': {e}" + ), + })?; + let dep_path = deps_dir.join(format!("{name}-{commit_sha}")); + let was_cached = dep_path.exists(); + if !was_cached { + download_dependency( + repo, + &commit_sha, + &dep.src, + &dep_path, + dep.recursive, + &dep.exclude, + *submodules, + creds, + ) + .await + .map_err(|e| VwError::Dependency { + message: format!( + "Failed to download dependency '{name}': {e}" + ), + })?; + } + update_info.push(DependencyUpdateInfo { + name: name.clone(), + commit: commit_sha.clone(), + was_cached, + }); + lock_file.dependencies.insert( + name.clone(), + LockedDependency { + repo: repo.clone(), + commit: commit_sha.clone(), + src: dep.src.clone(), + path: PathBuf::from(format!("{name}-{commit_sha}")), + recursive: dep.recursive, + sim_only: dep.sim_only, + submodules: *submodules, + exclude: dep.exclude.clone(), + }, + ); + dep_path + } + DependencySource::Path { path } => { + if !path.exists() { + return Err(VwError::Dependency { + message: format!( + "Local dependency '{name}' path does not exist: {}", + path.display() + ), + }); + } + update_info.push(DependencyUpdateInfo { + name: name.clone(), + commit: "local".into(), + was_cached: true, + }); + path.clone() + } + }; - // Find VHDL files in the cached dependency directory + // VHDL libraries: same treatment regardless of source. Local + // deps' file paths are kept absolute (they aren't relative to + // the per-user cache); git deps stay portable. let vhdl_files = find_vhdl_files(&dep_path, dep.recursive, &dep.exclude)?; if !vhdl_files.is_empty() { - let portable_files = - vhdl_files.into_iter().map(make_path_portable).collect(); + let files = if dep.is_local() { + vhdl_files + } else { + vhdl_files.into_iter().map(make_path_portable).collect() + }; vhdl_ls_config.libraries.insert( name.clone(), VhdlLsLibrary { - files: portable_files, + files, exclude: None, is_third_party: None, }, @@ -660,13 +750,15 @@ pub async fn add_dependency_with_token( let src_paths = vec![src.unwrap_or_else(|| ".".to_string())]; let dependency = Dependency { - repo: repo.clone(), - branch, - commit, + source: DependencySource::Git { + repo: repo.clone(), + branch, + commit, + submodules: false, + }, src: src_paths, recursive, sim_only, - submodules: false, exclude: Vec::new(), }; @@ -735,42 +827,42 @@ pub fn list_dependencies( let mut deps = Vec::new(); for (name, dep) in &config.dependencies { - let version_info = match &lock_file { - Some(lock) => { - if let Some(locked_dep) = lock.dependencies.get(name) { - VersionInfo::Locked { - commit: locked_dep.commit.clone(), - } - } else { - // Not yet resolved, show branch/commit from config - match (&dep.branch, &dep.commit) { - (Some(branch), None) => VersionInfo::Branch { - branch: branch.clone(), - }, - (None, Some(commit)) => VersionInfo::Commit { - commit: commit.clone(), - }, - _ => VersionInfo::Unknown, - } - } + let (source_label, version_info) = match &dep.source { + DependencySource::Path { path } => { + (path.display().to_string(), VersionInfo::Local) } - None => { - // No lock file, show branch/commit from config - match (&dep.branch, &dep.commit) { - (Some(branch), None) => VersionInfo::Branch { - branch: branch.clone(), - }, - (None, Some(commit)) => VersionInfo::Commit { - commit: commit.clone(), + DependencySource::Git { + repo, + branch, + commit, + .. + } => { + let from_config = + || match (branch.as_deref(), commit.as_deref()) { + (Some(b), None) => { + VersionInfo::Branch { branch: b.into() } + } + (None, Some(c)) => { + VersionInfo::Commit { commit: c.into() } + } + _ => VersionInfo::Unknown, + }; + let version = match &lock_file { + Some(lock) => match lock.dependencies.get(name) { + Some(locked_dep) => VersionInfo::Locked { + commit: locked_dep.commit.clone(), + }, + None => from_config(), }, - _ => VersionInfo::Unknown, - } + None => from_config(), + }; + (repo.clone(), version) } }; deps.push(DependencyInfo { name: name.clone(), - repo: dep.repo.clone(), + source: source_label, version: version_info, }); } @@ -781,15 +873,25 @@ pub fn list_dependencies( #[derive(Debug, Clone)] pub struct DependencyInfo { pub name: String, - pub repo: String, + /// User-facing source description: the repo URL for git deps, + /// the local path for path deps. + pub source: String, pub version: VersionInfo, } #[derive(Debug, Clone)] pub enum VersionInfo { - Branch { branch: String }, - Commit { commit: String }, - Locked { commit: String }, + Branch { + branch: String, + }, + Commit { + commit: String, + }, + Locked { + commit: String, + }, + /// Local filesystem dependency — no commit to pin. + Local, Unknown, } @@ -1959,6 +2061,46 @@ pub fn deps_directory() -> Result { /// per-user `$HOME/.vw/deps` directory) so the file is identical across /// machines. Absolute paths are returned unchanged to remain compatible /// with lock files written by older versions of vw. +/// Build a `name → absolute cache path` map for every dependency in +/// the workspace's `vw.lock`. Used by htcl's `src @name/...` resolver +/// in `vw-htcl::src_path::Resolver` so the language-layer crate stays +/// free of workspace / lockfile concerns. +/// +/// Returns an empty map (not an error) if the workspace has no +/// `vw.lock` yet — relative and absolute `src` imports still work +/// against an empty resolver, only `@name/` lookups fail. +pub fn dep_cache_paths( + workspace_dir: &Utf8Path, +) -> Result> { + let mut out = HashMap::new(); + + // Local deps live wherever `vw.toml` says; they don't need a + // lockfile (nothing to pin). Read them straight from the workspace + // config so they work before — or without — a `vw update`. + if let Ok(config) = load_workspace_config(workspace_dir) { + for (name, dep) in config.dependencies { + if let Some(path) = dep.local_path() { + out.insert(name, path.to_path_buf()); + } + } + } + + // Git deps are resolved through the lockfile and the per-user + // cache. A missing lock isn't an error here — just skip git entries. + match load_lock_file(workspace_dir) { + Ok(lock) => { + for (name, locked) in lock.dependencies { + let abs = resolve_dep_path(&locked.path)?; + out.insert(name, abs); + } + } + Err(VwError::Config { .. }) => {} + Err(e) => return Err(e), + } + + Ok(out) +} + fn resolve_dep_path(path: &Path) -> Result { if path.is_absolute() { return Ok(path.to_path_buf()); @@ -1967,6 +2109,55 @@ fn resolve_dep_path(path: &Path) -> Result { Ok(deps_dir.join(path)) } +/// Like [`dep_cache_paths`], but walks the dependency graph +/// transitively: for every dep whose cached root is itself a +/// workspace (i.e. has its own `vw.toml`), pull in *its* deps too, +/// and so on. The result is a flat `name → root` map covering every +/// dep any file in this workspace's transitive closure might +/// `src @/...`-import. +/// +/// First-seen-wins on name conflicts so the entry workspace's +/// declarations take precedence over a dep's choice of the same +/// name (matching Cargo's resolution: the top-level `Cargo.toml` +/// pins the version for the whole graph). +/// +/// Returns an empty map (not an error) if the entry workspace has +/// no deps. Per-dep failures (missing `vw.toml`, malformed config) +/// are skipped: a dep may not be its own htcl workspace, and that's +/// fine — we just won't see *its* deps. +pub fn transitive_dep_cache_paths( + entry_workspace_dir: &Utf8Path, +) -> Result> { + let mut out: HashMap = HashMap::new(); + let mut visited: std::collections::HashSet = + std::collections::HashSet::new(); + let mut queue: Vec = + vec![entry_workspace_dir.as_std_path().to_path_buf()]; + + while let Some(ws) = queue.pop() { + if !visited.insert(ws.clone()) { + continue; + } + let Ok(ws_utf8) = Utf8PathBuf::from_path_buf(ws) else { + continue; + }; + let Ok(paths) = dep_cache_paths(&ws_utf8) else { + continue; + }; + for (name, dep_path) in paths { + // First-seen wins — don't let a transitive dep override + // the entry workspace's choice. + out.entry(name).or_insert_with(|| dep_path.clone()); + // If the dep is itself a workspace, recurse into it. A + // dep without a `vw.toml` is a leaf (just files). + if dep_path.join("vw.toml").exists() { + queue.push(dep_path); + } + } + } + Ok(out) +} + async fn resolve_dependency_commit( repo_url: &str, branch: &Option, @@ -2618,3 +2809,167 @@ async fn build_rust_library( Ok(lib_path.into()) } + +#[cfg(test)] +mod dependency_source_tests { + use super::*; + + /// A `vw.toml` entry with `repo = "..."` parses as a git source — + /// the historical behaviour that pre-dates path deps. + #[test] + fn git_dep_parses_from_repo_key() { + let toml = r#" + [workspace] + name = "demo" + version = "0.1.0" + + [dependencies.quartz] + repo = "https://github.com/oxidecomputer/quartz" + branch = "main" + src = ["hdl/ip/vhd"] + recursive = true + "#; + let config: WorkspaceConfig = toml::from_str(toml).unwrap(); + let dep = &config.dependencies["quartz"]; + assert!(!dep.is_local()); + assert_eq!(dep.repo(), Some("https://github.com/oxidecomputer/quartz")); + assert_eq!(dep.branch(), Some("main")); + assert!(dep.recursive); + assert_eq!(dep.src, vec!["hdl/ip/vhd".to_string()]); + } + + /// The metroid layout: `path = "..."` and nothing else. + #[test] + fn path_dep_parses_from_path_key() { + let toml = r#" + [workspace] + name = "metroid" + version = "0.1.0" + + [dependencies.amd-htcl] + path = "/home/ry/src/amd-htcl" + "#; + let config: WorkspaceConfig = toml::from_str(toml).unwrap(); + let dep = &config.dependencies["amd-htcl"]; + assert!(dep.is_local()); + assert_eq!(dep.local_path(), Some(Path::new("/home/ry/src/amd-htcl"))); + assert_eq!(dep.repo(), None); + assert_eq!(dep.branch(), None); + } + + #[test] + fn transitive_dep_resolution_pulls_in_lib_of_lib() { + // metroid → cips → vivado-cmd. Asking for metroid's deps + // transitively should return cips AND vivado-cmd, even though + // metroid only declares cips. + let dir = tempfile::tempdir().unwrap(); + let metroid = dir.path().join("metroid"); + let cips = dir.path().join("cips"); + let vivado_cmd = dir.path().join("vivado-cmd"); + std::fs::create_dir_all(&metroid).unwrap(); + std::fs::create_dir_all(&cips).unwrap(); + std::fs::create_dir_all(&vivado_cmd).unwrap(); + std::fs::write( + metroid.join("vw.toml"), + format!( + "[workspace]\nname=\"metroid\"\nversion=\"0.1.0\"\n\n\ + [dependencies.cips]\npath = \"{}\"\n", + cips.display() + ), + ) + .unwrap(); + std::fs::write( + cips.join("vw.toml"), + format!( + "[workspace]\nname=\"cips\"\nversion=\"0.1.0\"\n\n\ + [dependencies.vivado-cmd]\npath = \"{}\"\n", + vivado_cmd.display() + ), + ) + .unwrap(); + // vivado-cmd is a leaf — has a vw.toml but no deps of its own. + std::fs::write( + vivado_cmd.join("vw.toml"), + "[workspace]\nname=\"vivado-cmd\"\nversion=\"0.1.0\"\n", + ) + .unwrap(); + + let metroid_utf8 = Utf8PathBuf::from_path_buf(metroid.clone()).unwrap(); + let resolved = transitive_dep_cache_paths(&metroid_utf8).unwrap(); + assert_eq!(resolved.get("cips"), Some(&cips)); + assert_eq!(resolved.get("vivado-cmd"), Some(&vivado_cmd)); + assert_eq!(resolved.len(), 2, "{resolved:?}"); + } + + #[test] + fn transitive_dep_resolution_first_seen_wins() { + // entry → A and entry → B, both A and B declare a dep + // `shared` pointing at different paths. Entry's view of + // `shared` is whichever was inserted first; entry itself + // doesn't declare `shared`, so the test just asserts we got + // *one* deterministic answer rather than a panic / duplicate. + let dir = tempfile::tempdir().unwrap(); + let entry = dir.path().join("entry"); + let a = dir.path().join("a"); + let b = dir.path().join("b"); + let shared_v1 = dir.path().join("shared-v1"); + let shared_v2 = dir.path().join("shared-v2"); + for d in [&entry, &a, &b, &shared_v1, &shared_v2] { + std::fs::create_dir_all(d).unwrap(); + } + std::fs::write( + entry.join("vw.toml"), + format!( + "[workspace]\nname=\"entry\"\nversion=\"0.1.0\"\n\n\ + [dependencies.a]\npath = \"{}\"\n\ + [dependencies.b]\npath = \"{}\"\n", + a.display(), + b.display() + ), + ) + .unwrap(); + std::fs::write( + a.join("vw.toml"), + format!( + "[workspace]\nname=\"a\"\nversion=\"0.1.0\"\n\n\ + [dependencies.shared]\npath = \"{}\"\n", + shared_v1.display() + ), + ) + .unwrap(); + std::fs::write( + b.join("vw.toml"), + format!( + "[workspace]\nname=\"b\"\nversion=\"0.1.0\"\n\n\ + [dependencies.shared]\npath = \"{}\"\n", + shared_v2.display() + ), + ) + .unwrap(); + + let entry_utf8 = Utf8PathBuf::from_path_buf(entry).unwrap(); + let resolved = transitive_dep_cache_paths(&entry_utf8).unwrap(); + // `shared` is present exactly once and points at one of the + // two candidates; we don't pin which (HashMap iter order). + let shared = resolved.get("shared").unwrap(); + assert!(*shared == shared_v1 || *shared == shared_v2, "{shared:?}"); + } + + /// Local deps round-trip through serialize/deserialize. + #[test] + fn path_dep_roundtrips() { + let dep = Dependency { + source: DependencySource::Path { + path: PathBuf::from("/some/where"), + }, + src: Vec::new(), + recursive: false, + sim_only: false, + exclude: Vec::new(), + }; + let serialized = toml::to_string(&dep).unwrap(); + let deserialized: Dependency = toml::from_str(&serialized).unwrap(); + assert!(deserialized.is_local()); + assert_eq!(deserialized.local_path(), Some(Path::new("/some/where"))); + } +} diff --git a/vw-quote/Cargo.toml b/vw-quote/Cargo.toml new file mode 100644 index 0000000..f01e952 --- /dev/null +++ b/vw-quote/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "vw-quote" +version.workspace = true +edition.workspace = true +license.workspace = true + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1" +syn = { version = "2", features = ["full"] } +quote = "1" + +[dev-dependencies] +vw-htcl = { path = "../vw-htcl" } diff --git a/vw-quote/src/lib.rs b/vw-quote/src/lib.rs new file mode 100644 index 0000000..b724e21 --- /dev/null +++ b/vw-quote/src/lib.rs @@ -0,0 +1,249 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! `quote_htcl!` — generate htcl source code with interpolation. +//! +//! Analogous to the `quote` crate, but for htcl. Takes a string +//! literal of htcl source containing `#(expr)` interpolation markers +//! and produces a `String` of well-formed htcl at runtime. Each +//! interpolated value is passed through [`vw_htcl::emit::ToHtcl`] to +//! choose the right word form (bare vs. quoted vs. …) so generated +//! code is always parseable. +//! +//! ## Why a string literal? +//! +//! Rust's `TokenStream` doesn't preserve newlines, and newlines +//! terminate htcl commands — so a token-walking macro that reads +//! `quote_htcl! { proc x { … } { … } }` directly can't tell where one +//! statement ends and the next begins. Taking the input as a string +//! literal keeps the source text exact at zero cost in ergonomics. +//! +//! ## Syntax +//! +//! - `#(expr)` — interpolation slot. `expr` is parsed as a Rust +//! expression at macro time and emitted via +//! `vw_htcl::emit::ToHtcl::to_htcl(&expr)`. +//! - Anything else is literal htcl, copied verbatim except that `{` +//! and `}` in the template don't need any escaping (the macro +//! handles `format!` quoting for you). +//! +//! ## Example +//! +//! ```ignore +//! use vw_quote::quote_htcl; +//! let name = "greet"; +//! let who = "world"; +//! let s = quote_htcl!("\ +//! proc #(name) { +//! #(who) +//! } { puts hi } +//! "); +//! // s == "proc greet {\n world\n} { puts hi }\n" +//! // (the interpolated values get `ToHtcl`-formatted; here both are +//! // bare identifiers, so they emit as-is.) +//! ``` + +use proc_macro::TokenStream; +use proc_macro2::{Span, TokenStream as TokenStream2}; +use quote::quote; +use syn::{parse_macro_input, Expr, LitStr}; + +#[proc_macro] +pub fn quote_htcl(input: TokenStream) -> TokenStream { + expand(input, Dialect::Htcl) +} + +/// Same template grammar as [`quote_htcl!`], but routes interpolated +/// values through [`vw_htcl::emit::ToTcl`] and produces pure Tcl +/// (no htcl-specific attribute handling). Use for compiler-emitted +/// runtime helpers — `repr` procs, `kwargs` shim glue, anything that +/// lives in the Tcl interpreter and should never look like htcl. +/// +/// The split exists so future Tcl-only behavior (typed `Tcl_Obj` +/// handle quoting, etc.) can land on `ToTcl` without changing +/// `quote_htcl!`'s contract. +#[proc_macro] +pub fn quote_tcl(input: TokenStream) -> TokenStream { + expand(input, Dialect::Tcl) +} + +/// Which interpolation trait the macro routes through. The template +/// parsing is shared verbatim — the only thing that differs is the +/// trait + method name used in the generated `format!` arguments. +#[derive(Clone, Copy)] +enum Dialect { + Htcl, + Tcl, +} + +fn expand(input: TokenStream, dialect: Dialect) -> TokenStream { + let lit = parse_macro_input!(input as LitStr); + let template_text = lit.value(); + let lit_span = lit.span(); + + let (template, exprs) = match split_template(&template_text, lit_span) { + Ok(parts) => parts, + Err(e) => return e.to_compile_error().into(), + }; + + // Escape literal `{`/`}` so the format string parser leaves them + // alone; replace each `#(…)` site with a positional placeholder. + let format_string = render_format_string(&template, exprs.len()); + let format_lit = LitStr::new(&format_string, Span::call_site()); + + let exprs: Vec = + exprs.into_iter().map(|e| e.to_token_stream()).collect(); + + let out = match dialect { + Dialect::Htcl => quote! {{ + // Bring the trait into scope so `(&expr).to_htcl()` resolves + // without the caller needing to import it. + #[allow(unused_imports)] + use ::vw_htcl::emit::ToHtcl as _; + ::std::format!( + #format_lit, + #( (&{ #exprs }).to_htcl() ),* + ) + }}, + Dialect::Tcl => quote! {{ + #[allow(unused_imports)] + use ::vw_htcl::emit::ToTcl as _; + ::std::format!( + #format_lit, + #( (&{ #exprs }).to_tcl() ),* + ) + }}, + }; + out.into() +} + +// --- template parsing ------------------------------------------------------ + +/// One piece of the parsed template. +enum Piece { + /// Verbatim text from the template. + Text(String), + /// An interpolation site; index into the parallel `exprs` Vec. + Interp, +} + +/// Split the template string into alternating text / interpolation +/// pieces, parsing each `#(…)` body as a Rust expression. +fn split_template( + text: &str, + lit_span: Span, +) -> syn::Result<(Vec, Vec)> { + let mut pieces = Vec::new(); + let mut exprs = Vec::new(); + let mut buf = String::new(); + + let bytes = text.as_bytes(); + let mut i = 0; + while i < bytes.len() { + let c = bytes[i]; + // Allow `##` doc comments and `#` plain comments to pass + // through unmolested: interpolation is `#(...)`, so we only + // engage when `#` is immediately followed by `(`. + if c == b'#' && i + 1 < bytes.len() && bytes[i + 1] == b'(' { + if !buf.is_empty() { + pieces.push(Piece::Text(std::mem::take(&mut buf))); + } + let body_end = match find_matching_paren(bytes, i + 1) { + Some(end) => end, + None => { + return Err(syn::Error::new( + lit_span, + "unterminated `#(...)` in quote_htcl! template", + )); + } + }; + // bytes[i+2 .. body_end] is the inside of the parens. + let expr_src = &text[i + 2..body_end]; + let expr: Expr = syn::parse_str(expr_src).map_err(|e| { + syn::Error::new( + lit_span, + format!( + "could not parse interpolation `{expr_src}` as a \ + Rust expression: {e}" + ), + ) + })?; + exprs.push(expr); + pieces.push(Piece::Interp); + i = body_end + 1; + continue; + } + // Push this char (handle UTF-8 boundary by stepping a full + // char rather than a byte). + let ch_start = i; + // Safe because `i` is always at a UTF-8 boundary (we only + // advance by full chars or past ASCII chars we recognized). + let ch = text[ch_start..].chars().next().unwrap(); + buf.push(ch); + i += ch.len_utf8(); + } + if !buf.is_empty() { + pieces.push(Piece::Text(buf)); + } + Ok((pieces, exprs)) +} + +/// Find the byte index of the `)` that matches an opening `(` at +/// `bytes[open]`. Tracks nested parens. Returns `None` on unterminated. +fn find_matching_paren(bytes: &[u8], open: usize) -> Option { + debug_assert_eq!(bytes[open], b'('); + let mut depth = 1usize; + let mut i = open + 1; + while i < bytes.len() { + match bytes[i] { + b'(' => depth += 1, + b')' => { + depth -= 1; + if depth == 0 { + return Some(i); + } + } + _ => {} + } + i += 1; + } + None +} + +/// Build the format string passed to `std::format!`, with literal +/// `{`/`}` doubled and `{0}` / `{1}` / … placeholders inserted at each +/// interpolation site. +fn render_format_string(pieces: &[Piece], n_interps: usize) -> String { + let mut out = String::new(); + let mut next_interp = 0usize; + let _ = n_interps; + for piece in pieces { + match piece { + Piece::Text(s) => { + for c in s.chars() { + match c { + '{' => out.push_str("{{"), + '}' => out.push_str("}}"), + other => out.push(other), + } + } + } + Piece::Interp => { + use std::fmt::Write; + write!(out, "{{{}}}", next_interp).unwrap(); + next_interp += 1; + } + } + } + out +} + +trait ToTokenStream { + fn to_token_stream(&self) -> TokenStream2; +} +impl ToTokenStream for Expr { + fn to_token_stream(&self) -> TokenStream2 { + quote! { #self } + } +} diff --git a/vw-quote/tests/basic.rs b/vw-quote/tests/basic.rs new file mode 100644 index 0000000..1e0866f --- /dev/null +++ b/vw-quote/tests/basic.rs @@ -0,0 +1,140 @@ +// Integration tests for `quote_htcl!` and `quote_tcl!`. A proc-macro +// crate can't use its own macros from within `src/`, so these tests +// live in `tests/` and pull `vw-quote` and `vw-htcl` as dev-deps. + +use vw_quote::{quote_htcl, quote_tcl}; + +#[test] +fn literal_passthrough() { + let s = quote_htcl!("puts hi\n"); + assert_eq!(s, "puts hi\n"); +} + +#[test] +fn simple_ident_interpolation() { + let name = "greet"; + let s = quote_htcl!("proc #(name) {} { puts hi }\n"); + assert_eq!(s, "proc greet {} { puts hi }\n"); +} + +#[test] +fn expression_interpolation() { + let width = 16u32; + let s = quote_htcl!("set w #(width)\n"); + assert_eq!(s, "set w 16\n"); +} + +#[test] +fn values_needing_quoting_get_quoted() { + let msg = "hello world"; + let s = quote_htcl!("puts #(msg)\n"); + assert_eq!(s, "puts \"hello world\"\n"); +} + +#[test] +fn dollar_in_value_is_escaped() { + let s = "$x"; + let out = quote_htcl!("puts #(s)\n"); + // The value `$x` has special chars, so it gets quoted with + // `\$` escaped — preserving it as the literal text "$x" at runtime. + assert_eq!(out, "puts \"\\$x\"\n"); +} + +#[test] +fn braces_in_template_pass_through() { + let name = "f"; + let s = quote_htcl!("proc #(name) {} {\n puts hi\n}\n"); + assert_eq!(s, "proc f {} {\n puts hi\n}\n"); +} + +#[test] +fn doc_comment_passes_through() { + let s = quote_htcl!("## A doc comment.\nputs hi\n"); + assert_eq!(s, "## A doc comment.\nputs hi\n"); +} + +#[test] +fn multiple_interpolations() { + let name = "greet"; + let arg = "world"; + let s = quote_htcl!("proc #(name) { #(arg) } { puts hi }\n"); + assert_eq!(s, "proc greet { world } { puts hi }\n"); +} + +#[test] +fn method_call_in_interpolation() { + struct P { + name: &'static str, + } + let p = P { name: "greet" }; + let s = quote_htcl!("proc #(p.name) {} { }\n"); + assert_eq!(s, "proc greet {} { }\n"); +} + +#[test] +fn output_parses_as_valid_htcl() { + let name = "greet"; + let msg = "hi there"; + let s = quote_htcl!("proc #(name) {} {\n puts #(msg)\n}\n"); + let parsed = vw_htcl::parse(&s); + assert!(parsed.errors.is_empty(), "{:?}", parsed.errors); +} + +// --- quote_tcl! ------------------------------------------------------------- +// +// Mirror the quote_htcl! shape tests. The Tcl-dialect macro shares +// the same template parser, so the same template should produce the +// same output for the cases that overlap (which is most of them +// today — the Tcl/htcl split exists so they can DIVERGE later, not +// because their current behavior differs). + +#[test] +fn tcl_literal_passthrough() { + let s = quote_tcl!("puts hi\n"); + assert_eq!(s, "puts hi\n"); +} + +#[test] +fn tcl_simple_ident_interpolation() { + let name = "greet"; + let s = quote_tcl!("proc #(name) {} { puts hi }\n"); + assert_eq!(s, "proc greet {} { puts hi }\n"); +} + +#[test] +fn tcl_expression_interpolation() { + let width = 16u32; + let s = quote_tcl!("set w #(width)\n"); + assert_eq!(s, "set w 16\n"); +} + +#[test] +fn tcl_values_needing_quoting_get_quoted() { + let msg = "hello world"; + let s = quote_tcl!("puts #(msg)\n"); + assert_eq!(s, "puts \"hello world\"\n"); +} + +#[test] +fn tcl_braces_in_template_pass_through() { + let name = "f"; + let s = quote_tcl!("proc #(name) {} {\n puts hi\n}\n"); + assert_eq!(s, "proc f {} {\n puts hi\n}\n"); +} + +#[test] +fn tcl_multiple_interpolations() { + let name = "greet"; + let arg = "world"; + let s = quote_tcl!("proc #(name) { #(arg) } { puts hi }\n"); + assert_eq!(s, "proc greet { world } { puts hi }\n"); +} + +#[test] +fn tcl_emits_repr_proc_shape() { + // A representative use case from step 2b: emit a per-type + // repr proc body via quote_tcl!. + let mangled = "string"; + let s = quote_tcl!("proc #(mangled)::repr {v} {\n return $v\n}\n"); + assert_eq!(s, "proc string::repr {v} {\n return $v\n}\n"); +} diff --git a/vw-repl/Cargo.toml b/vw-repl/Cargo.toml new file mode 100644 index 0000000..decdb40 --- /dev/null +++ b/vw-repl/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "vw-repl" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "Interactive REPL for htcl scripts, driven by a long-lived Vivado worker" + +[dependencies] +vw-htcl = { path = "../vw-htcl" } +vw-eda = { path = "../vw-eda" } +vw-vivado = { path = "../vw-vivado" } +vw-lib = { path = "../vw-lib" } +camino.workspace = true +ratatui.workspace = true +crossterm.workspace = true +tui-textarea.workspace = true +tokio.workspace = true +thiserror.workspace = true +dirs.workspace = true +futures.workspace = true +tracing.workspace = true +arboard = "3" +base64 = "0.22" +winnow.workspace = true + +[dev-dependencies] +tempfile.workspace = true diff --git a/vw-repl/src/app.rs b/vw-repl/src/app.rs new file mode 100644 index 0000000..ba7641f --- /dev/null +++ b/vw-repl/src/app.rs @@ -0,0 +1,2159 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! REPL state machine + event loop. +//! +//! Single tokio task drives both the ratatui screen and the Vivado +//! worker. Inputs are crossterm key events; outputs are eval results +//! from the worker plus our own scrollback updates. A `tokio::select!` +//! arbitrates the two so neither side blocks the other. +//! +//! A Vivado eval can take seconds to minutes. The UI stays +//! responsive throughout: the input area locks (`eval_in_flight`) +//! but the screen still redraws, the worker's stdout still streams +//! into scrollback as it arrives, and Ctrl-C cancels the in-flight +//! eval (sent as a TCL interrupt to the worker). + +use std::time::Duration; + +use crossterm::event::{ + DisableMouseCapture, EnableMouseCapture, Event, KeyCode, KeyEvent, + KeyEventKind, KeyModifiers, MouseButton, MouseEvent, MouseEventKind, +}; +use crossterm::terminal::{ + disable_raw_mode, enable_raw_mode, EnterAlternateScreen, + LeaveAlternateScreen, +}; +use crossterm::ExecutableCommand; +use futures::StreamExt; +use ratatui::backend::CrosstermBackend; +use ratatui::layout::Rect; +use ratatui::Terminal; +use tokio::sync::mpsc; +use tui_textarea::{Input, TextArea}; +use vw_eda::EdaBackend; + +use crate::history::History; +use crate::lower::Origin; +use crate::session::{Session, SessionBatch}; +use crate::ui::{self, WorkerStatusView}; +use crate::{ReplError, ReplOptions}; + +/// What category an entry in the scrollback log belongs to. Drives +/// the per-line gutter prefix and color. +#[derive(Clone, Copy, Debug)] +pub enum ScrollbackKind { + /// Echo of an input the user submitted. + Input, + /// A return value from a successful eval. + Result, + /// Captured stdout from `puts` etc. during an eval. + Stdout, + /// An error — TCL-level or REPL-level. + Error, + /// A pre-flight warning the user should see before the + /// underlying eval result — e.g. "this call uses keyword args + /// but isn't a loaded htcl wrapper." Distinct color from + /// notices so it actually pulls the eye. + Warning, + /// Internal notice (`vivado: ready`, `:load`, `:restart`, etc.). + Notice, +} + +/// Tracks where each echoed top-level statement's Input entry +/// lives in scrollback AND which lowered command-index in the +/// batch is its last. When that command finishes evaluating, the +/// Input entry's timer freezes — giving accurate per-statement +/// durations in multi-statement load batches instead of all +/// entries sharing the whole-batch wall time. +#[derive(Clone, Debug)] +struct InputBoundary { + scrollback_idx: usize, + /// Eval-index in `pending_origins` of the last lowered + /// command that originated from this top-level statement. + /// When `pending_eval_index` reaches this value (i.e. the + /// command at this index has just finished), the entry's + /// timer should freeze. + last_command_idx: usize, + /// Set to true once we've stamped this entry's `completed_at`, + /// so we don't re-stamp on subsequent EvalDones. + completed: bool, +} + +#[derive(Clone, Debug)] +pub struct ScrollbackEntry { + pub kind: ScrollbackKind, + pub text: String, + /// When this entry was pushed. Only set for `Input` entries + /// — used by the renderer to right-justify a `Ns` / + /// `M:SS` / `H:MM:SS` elapsed-time marker on the first + /// line. Non-input entries don't get timed and leave this + /// `None`. + pub started_at: Option, + /// When the corresponding eval finished. `None` while the + /// eval is still running (renderer shows live-updating + /// elapsed time from `started_at`); `Some(t)` freezes the + /// timer at the final duration once the batch completes. + pub completed_at: Option, +} + +/// Drag-selection over scrollback rows. Coordinates are `(row, col)` +/// indices into the post-wrap line list (see +/// [`crate::render::wrap_lines`]) — i.e. the same indexing the +/// renderer uses for `Paragraph::scroll`. `anchor` is where the user +/// pressed; `cursor` updates while dragging. The range may be +/// inverted (cursor before anchor) — callers normalize via +/// [`Selection::ordered`] before applying. +#[derive(Clone, Copy, Debug)] +pub struct Selection { + pub anchor: (usize, usize), + pub cursor: (usize, usize), +} + +impl Selection { + /// Return `(start, end)` with `start <= end` so callers don't + /// have to special-case backwards drags. + pub fn ordered(&self) -> ((usize, usize), (usize, usize)) { + if self.anchor <= self.cursor { + (self.anchor, self.cursor) + } else { + (self.cursor, self.anchor) + } + } +} + +#[derive(Clone, Debug)] +pub struct ReverseSearch { + /// The substring the user is searching for. + pub query: String, + /// Index in [`History::entries`] of the current match. + pub match_index: Option, + /// The matched entry's text, cloned for the UI's static lifetime. + pub match_text: String, +} + +pub struct App { + opts: ReplOptions, + input: TextArea<'static>, + history: History, + /// Where the up/down (Ctrl-P/Ctrl-N) history walk is currently + /// positioned. `None` means "composing a fresh entry" — the + /// state any new keypress (other than Ctrl-P/N themselves) + /// drops back into so editing after walking history doesn't + /// keep recalling stale entries. `Some(i)` indexes + /// `History::entries()` directly. + history_cursor: Option, + /// In-progress draft saved when the user first walks back into + /// history with Ctrl-P. Restored on Ctrl-N past the newest + /// entry, so the draft they were typing isn't lost. + history_draft: String, + session: Session, + scrollback: Vec, + scrollback_scroll: u16, + /// Whether the terminal is currently capturing mouse events. + /// Default ON since we implement our own drag-to-select + + /// clipboard copy (see [`Selection`]). F2 toggles it off for + /// users who'd rather use terminal-native selection (which + /// requires capture to be disabled because the protocol is + /// all-or-nothing). + mouse_capture: bool, + /// Last scrollback render area, captured by `ui::draw_scrollback` + /// each frame. Lets mouse handlers map screen coords back to + /// scrollback-local cells without round-tripping through the UI. + scrollback_area: Option, + /// Active drag-selection in scrollback. Coordinates are + /// `(row, col)` indices into the post-wrap scrollback line list + /// — see `render::wrap_lines`. `None` outside of an active drag + /// or after a copy completes. + selection: Option, + /// Tail-follow mode. When `true`, the renderer pins the + /// effective scroll offset to the bottom of the wrapped-row + /// list — same model as `tail -f` or a fresh terminal. Manual + /// scroll-up flips this off so the user can read older content + /// without the view jumping out from under them; scrolling + /// back down to the bottom flips it back on. Submitting a new + /// command resets to `true`. + scrollback_follow: bool, + /// The effective scroll offset the renderer used on the most + /// recent frame. Written by `ui::draw_scrollback`, read by the + /// mouse / keyboard scroll handlers so a manual move from + /// tail-follow mode "takes over" the rendered position rather + /// than jumping back to whatever stale value is in + /// `scrollback_scroll`. + last_rendered_scroll: u16, + reverse_search: Option, + worker_state: WorkerState, + worker_tx: mpsc::Sender, + eval_rx: mpsc::UnboundedReceiver, + /// Origins of every command we shipped to the worker in the + /// current batch, in eval order, paired with an index for the + /// next not-yet-acknowledged command. Lets the stream handler + /// tag a mid-eval Vivado warning with `at : + /// in ` even when Vivado bypasses our Tcl-level + /// stack capture (the IP_Flow C++ property validators don't + /// call `::common::send_msg_id`, so neither shim override + /// fires — without this fallback those warnings would arrive + /// stack-less). + pending_origins: Vec, + /// Parallel to `pending_origins`: the expected return type of + /// each shipped command, when statically resolvable. Used by + /// the `EvalDone` handler to (a) skip the heuristic formatter + /// when the value is already type-formatted by the wrapped + /// Tcl, and (b) suppress the Result push entirely for + /// `unit`-typed expressions. + pending_return_types: Vec>, + /// For per-Input-entry timer freezing: one entry per + /// echoed top-level statement in the current batch, in + /// source order. Each carries the scrollback index of its + /// Input entry and the eval-index of the LAST lowered + /// command that came from that statement. When EvalDone + /// fires for that command index, we freeze the entry's + /// timer AND start the next entry's timer (so per-statement + /// durations in a multi-statement load batch are accurate + /// instead of all reading the whole-batch wall time). + pending_input_boundaries: Vec, + pending_eval_index: usize, + /// The batch we shipped to the worker but haven't yet seen a + /// result for. Held aside so a successful eval (and only a + /// successful one) commits to the session — and so the error + /// renderer can look up procs declared in this in-flight + /// batch (which aren't yet in `session`) when drilling into a + /// Tcl stack frame. + pending_batch: Option, + /// Set when `:quit` (or Ctrl-D on an empty buffer) fires, so the + /// outer loop bails out after the current frame. + exit: bool, +} + +enum WorkerState { + Starting, + Ready, + Running, + Down, +} + +/// Commands sent from the UI to the worker task. A batch ships one +/// or more lowered htcl statements; the worker fires `eval` per +/// item, sends one [`WorkerEvent::EvalDone`] per item, and stops at +/// the first failure so we don't keep running a script after it's +/// hit an error. +enum WorkerCmd { + EvalBatch(Vec), + Shutdown, +} + +/// Events sent from the worker task back to the UI. +enum WorkerEvent { + Started, + /// One streaming chunk from the worker, with its source-of- + /// origin tag so the UI can render Vivado WARNING/ERROR lines + /// distinctly from user `puts` output. + Stream { + kind: vw_vivado::StreamKind, + data: String, + }, + /// One item of a batch completed. `origin` is the htcl source + /// location the lowered Tcl came from so the renderer can show + /// `file:line` rather than a Tcl stack trace pointing at the + /// shim. `last_in_batch` lets the UI know when to commit to + /// the session document. + EvalDone { + origin: crate::lower::Origin, + result: Result, + last_in_batch: bool, + }, + StartFailed(vw_eda::BackendError), +} + +pub async fn run(opts: ReplOptions) -> Result<(), ReplError> { + enable_raw_mode()?; + let mut stdout = std::io::stdout(); + stdout.execute(EnterAlternateScreen)?; + // Mouse capture ON by default — the app implements its own + // drag-to-select + clipboard copy so users get text selection + // back (helix-style: app-level highlight rendered with + // `Modifier::REVERSED`, copied to the OS clipboard on mouse + // release). F2 toggles capture off if a user would rather use + // their terminal's native selection. + stdout.execute(EnableMouseCapture)?; + let backend = CrosstermBackend::new(stdout); + let mut terminal = Terminal::new(backend)?; + + let result = run_inner(&mut terminal, opts).await; + + disable_raw_mode()?; + let mut stdout = std::io::stdout(); + // No-op if capture was already disabled via F2. + let _ = stdout.execute(DisableMouseCapture); + stdout.execute(LeaveAlternateScreen)?; + terminal.show_cursor()?; + result +} + +async fn run_inner( + terminal: &mut Terminal>, + opts: ReplOptions, +) -> Result<(), ReplError> { + let (worker_tx, worker_rx) = mpsc::channel::(8); + let (event_tx, eval_rx) = mpsc::unbounded_channel::(); + let verbose = opts.verbose; + // Verbose output can't go to stderr in REPL mode — that's the + // same fd the TUI renders on, so any byte stomps through the + // alternate-screen buffer. Route it to a per-process tempfile + // instead and tell the user where to find it. + let verbose_log_path = if verbose { + Some( + std::env::temp_dir() + .join(format!("vw-repl-vivado-{}.log", std::process::id())), + ) + } else { + None + }; + tokio::spawn(worker_task( + worker_rx, + event_tx, + verbose, + verbose_log_path.clone(), + )); + + let mut app = App::new(opts, worker_tx, eval_rx); + if let Some(p) = verbose_log_path { + app.push( + ScrollbackKind::Notice, + format!( + "verbose output streaming to {} — `tail -f` from \ + another terminal", + p.display() + ), + ); + } + let mut crossterm_events = crossterm::event::EventStream::new(); + + loop { + terminal.draw(|f| ui::draw(f, &mut app))?; + if app.exit { + let _ = app.worker_tx.send(WorkerCmd::Shutdown).await; + return Ok(()); + } + + tokio::select! { + maybe_event = crossterm_events.next() => { + match maybe_event { + Some(Ok(ev)) => app.handle_terminal_event(ev).await, + Some(Err(e)) => { + app.push(ScrollbackKind::Error, format!("terminal: {e}")); + } + None => { + app.exit = true; + } + } + } + Some(event) = app.eval_rx.recv() => { + app.handle_worker_event(event).await; + } + _ = tokio::time::sleep(Duration::from_millis(250)) => { + // Periodic wake: lets the spinner / "starting" status + // animate even when nothing else is happening. + } + } + // Drain additional pending events from BOTH streams + // before the next draw. Without this, a burst of N + // mouse-wheel events or worker stream chunks each + // triggers its own draw — even though only the final + // state matters visually — and the queue bloats faster + // than draws can keep up. + // + // The biased `select!` plus a wildcard always-ready + // branch acts as a non-blocking "is anything pending?" + // check: if neither real branch is immediately ready, + // the wildcard wins and we break out. Capped at 256 + // events per cycle so a sustained event firehose still + // yields back to drawing periodically (the user sees + // forward progress instead of "frozen until the whole + // burst is processed"). + for _ in 0..256 { + let made_progress = tokio::select! { + biased; + Some(maybe_event) = crossterm_events.next() => { + match maybe_event { + Ok(ev) => app.handle_terminal_event(ev).await, + Err(e) => app.push( + ScrollbackKind::Error, + format!("terminal: {e}"), + ), + } + true + } + Some(event) = app.eval_rx.recv() => { + app.handle_worker_event(event).await; + true + } + _ = std::future::ready(()) => false, + }; + if !made_progress { + break; + } + } + } +} + +impl App { + fn new( + opts: ReplOptions, + worker_tx: mpsc::Sender, + eval_rx: mpsc::UnboundedReceiver, + ) -> Self { + let mut input = TextArea::default(); + input.set_cursor_line_style(ratatui::style::Style::default()); + Self { + opts, + input, + history: History::load_default(), + history_cursor: None, + history_draft: String::new(), + session: Session::new(), + scrollback: Vec::new(), + scrollback_scroll: 0, + mouse_capture: true, + scrollback_area: None, + selection: None, + scrollback_follow: true, + last_rendered_scroll: 0, + reverse_search: None, + worker_state: WorkerState::Starting, + worker_tx, + eval_rx, + pending_batch: None, + pending_origins: Vec::new(), + pending_return_types: Vec::new(), + pending_input_boundaries: Vec::new(), + pending_eval_index: 0, + exit: false, + } + } + + // --- queries used by ui.rs --------------------------------------- + + pub fn scrollback(&self) -> &[ScrollbackEntry] { + &self.scrollback + } + pub fn scrollback_scroll(&self) -> u16 { + self.scrollback_scroll + } + pub fn input_mut(&mut self) -> &mut TextArea<'static> { + &mut self.input + } + pub fn input_line_count(&self) -> usize { + self.input.lines().len() + } + pub fn reverse_search(&self) -> Option<&ReverseSearch> { + self.reverse_search.as_ref() + } + pub fn mouse_capture(&self) -> bool { + self.mouse_capture + } + pub fn selection(&self) -> Option { + self.selection + } + /// Called by `ui::draw_scrollback` each frame so subsequent + /// mouse events can translate absolute screen coords into + /// scrollback-local rows/cols. + pub fn set_scrollback_area(&mut self, area: Rect) { + self.scrollback_area = Some(area); + } + + pub fn scrollback_follow(&self) -> bool { + self.scrollback_follow + } + + /// Renderer-side writeback: records the scroll offset that was + /// actually used to paint the current frame. Mouse / keyboard + /// scroll handlers anchor their deltas off this so transitioning + /// out of tail-follow doesn't jump back to a stale + /// `scrollback_scroll` value. + pub fn set_last_rendered_scroll(&mut self, offset: u16) { + self.last_rendered_scroll = offset; + } + + /// Toggle terminal mouse capture. Writes the enable/disable + /// sequence directly to stdout — the alternate-screen / raw-mode + /// context that `run()` set up is still active. + fn toggle_mouse_capture(&mut self) { + let mut stdout = std::io::stdout(); + let _ = if self.mouse_capture { + stdout.execute(DisableMouseCapture) + } else { + stdout.execute(EnableMouseCapture) + }; + self.mouse_capture = !self.mouse_capture; + } + pub fn worker_state(&self) -> WorkerStatusView { + match self.worker_state { + WorkerState::Starting => WorkerStatusView::Starting, + WorkerState::Ready => WorkerStatusView::Ready, + WorkerState::Running => WorkerStatusView::Running, + WorkerState::Down => WorkerStatusView::Down, + } + } + pub fn eval_in_flight(&self) -> bool { + matches!(self.worker_state, WorkerState::Running) + } + + /// Whether the parser considers the current input buffer ready + /// to ship. Drives the input-area title and Enter behavior. + pub fn input_is_complete(&self) -> bool { + let buf = self.current_input_text(); + is_buffer_complete(&buf) + } + + fn current_input_text(&self) -> String { + self.input.lines().join("\n") + } + + /// Walk the input history by `delta` (negative = older, + /// positive = newer). Readline-style: first step back from the + /// "composing" position saves the current draft; stepping + /// past the newest entry restores it. Empty history is a no-op. + fn history_step(&mut self, delta: i32) { + let entries = self.history.entries(); + if entries.is_empty() { + return; + } + let cursor = match (self.history_cursor, delta) { + (None, d) if d >= 0 => return, // already at draft, can't go newer + (None, _) => { + // Stepping back from the draft for the first time — + // capture the in-progress text so Ctrl-N past the + // newest entry can restore it. + self.history_draft = self.current_input_text(); + entries.len().saturating_sub(1) + } + (Some(i), d) => { + let new = i as i32 + d; + if new < 0 { + 0 + } else if new >= entries.len() as i32 { + // Past the newest entry — drop back to draft. + self.history_cursor = None; + let draft = std::mem::take(&mut self.history_draft); + self.replace_input_with(&draft); + return; + } else { + new as usize + } + } + }; + self.history_cursor = Some(cursor); + let text = entries[cursor].clone(); + self.replace_input_with(&text); + } + + /// Reset the input buffer to `text`, placing the cursor at the + /// end. Used by history navigation and reverse-search accept. + fn replace_input_with(&mut self, text: &str) { + self.input = TextArea::default(); + for (i, line) in text.lines().enumerate() { + if i > 0 { + self.input.insert_newline(); + } + self.input.insert_str(line); + } + // If `text` ended with a newline, `lines()` drops it; preserve. + if text.ends_with('\n') { + self.input.insert_newline(); + } + } + + // --- event handling --------------------------------------------- + + fn handle_mouse_event(&mut self, mouse: MouseEvent) { + // Wheel events scroll the scrollback buffer. 3 lines per + // notch is the de-facto terminal-emulator default and + // matches what feels natural when you've held the wheel + // for half a second. Keyboard scroll (Ctrl-J/K) still + // jumps 5 — the wheel is finer-grained because the user + // can keep spinning. + // + // Direction: `scrollback_scroll` is ratatui's `scroll.y`, + // which counts lines skipped from the TOP of the buffer. + // Wheel-up should reveal older content above the viewport + // (terminal convention), which means moving the viewport + // UP through the buffer — i.e. SUBTRACTING from + // `scrollback_scroll`. Wheel-down does the reverse. + match mouse.kind { + MouseEventKind::ScrollUp => { + self.scroll_by(-3); + return; + } + MouseEventKind::ScrollDown => { + self.scroll_by(3); + return; + } + _ => {} + } + + // Drag-selection lives within the scrollback area only. + // Outside it, mouse events are ignored — the input box has + // its own selection model via tui-textarea and we don't + // want a click on a status bar to start a scrollback drag. + let Some(area) = self.scrollback_area else { + return; + }; + let in_area = mouse.column >= area.x + && mouse.column < area.x + area.width + && mouse.row >= area.y + && mouse.row < area.y + area.height; + + match mouse.kind { + MouseEventKind::Down(MouseButton::Left) if in_area => { + self.selection = Some(Selection { + anchor: self.cell_to_buffer(mouse.column, mouse.row, area), + cursor: self.cell_to_buffer(mouse.column, mouse.row, area), + }); + } + MouseEventKind::Drag(MouseButton::Left) + if self.selection.is_some() => + { + // Auto-scroll when the drag wanders past the + // top or bottom edge of the scrollback area so + // selections can extend beyond the current + // viewport. Crossterm fires drag events per + // cell of mouse movement, so the user wiggles + // the mouse at the edge to keep scrolling; + // simpler than tracking a "held at edge" timer + // and good enough for selection-extension UX. + let bottom = area.y + area.height; + if mouse.row >= bottom { + self.scroll_by(3); + } else if mouse.row < area.y { + self.scroll_by(-3); + } + // Clamp to the area: dragging outside still + // updates the cursor to the edge so selection + // can extend through the visible viewport even + // when the mouse strays. + let col = mouse + .column + .clamp(area.x, area.x + area.width.saturating_sub(1)); + let row = mouse + .row + .clamp(area.y, area.y + area.height.saturating_sub(1)); + let cursor = self.cell_to_buffer(col, row, area); + if let Some(sel) = self.selection.as_mut() { + sel.cursor = cursor; + } + } + MouseEventKind::Up(MouseButton::Left) => { + if let Some(sel) = self.selection.take() { + self.copy_selection_to_clipboard(sel); + } + } + _ => {} + } + } + + /// Translate a screen cell `(col, row)` inside the scrollback + /// `area` into a `(row, col)` index into the post-wrap line list. + /// The row index is `effective_scroll + (row - area.y)` so the + /// caller doesn't have to know about scroll state. + /// + /// Anchors against `last_rendered_scroll` rather than + /// `scrollback_scroll`. While tail-follow is on the renderer + /// computes the pinned offset on the fly and never writes it + /// back to `scrollback_scroll` — using the stale field here + /// would map mouse clicks to the wrong buffer rows once any + /// real volume of output has scrolled the viewport. + fn cell_to_buffer(&self, col: u16, row: u16, area: Rect) -> (usize, usize) { + let local_row = row.saturating_sub(area.y) as usize; + let local_col = col.saturating_sub(area.x) as usize; + let buf_row = self.last_rendered_scroll as usize + local_row; + (buf_row, local_col) + } + + /// Build the same post-wrap line list the UI renders, extract + /// the cells inside `sel`, and write the resulting plain text to + /// the OS clipboard. Failure (no clipboard backend / Wayland + /// permissions denied / …) surfaces as a Notice line so the + /// user knows the copy didn't go through. + fn copy_selection_to_clipboard(&mut self, sel: Selection) { + let Some(area) = self.scrollback_area else { + return; + }; + let mut flat: Vec> = Vec::new(); + for entry in &self.scrollback { + for line in crate::render::entry_lines(entry, area.width) { + flat.push(line); + } + } + let wrapped = crate::render::wrap_lines(flat, area.width); + let (start, end) = sel.ordered(); + if start == end { + return; // pure click, nothing to copy + } + let mut out = String::new(); + let last_row = end.0.min(wrapped.len().saturating_sub(1)); + for (row_idx, line) in wrapped + .iter() + .enumerate() + .skip(start.0) + .take(last_row + 1 - start.0) + { + let plain = crate::render::line_plain_text(line); + let chars: Vec = plain.chars().collect(); + let row_start = if row_idx == start.0 { start.1 } else { 0 }; + let row_end = if row_idx == end.0 { end.1 } else { chars.len() }; + let row_end = row_end.min(chars.len()); + let row_start = row_start.min(row_end); + for c in &chars[row_start..row_end] { + out.push(*c); + } + if row_idx < end.0 { + out.push('\n'); + } + } + if out.is_empty() { + return; + } + // Primary path: OSC 52. The terminal itself puts the text on + // the system clipboard — no DISPLAY / Wayland socket / + // pbcopy dependency, and it works through SSH. Most modern + // terminals support it (kitty, ghostty, iTerm2, Alacritty, + // Wezterm, recent xterm). Some require an opt-in + // (`set -g set-clipboard on` in tmux, `Allow programs to use + // clipboard` in iTerm2's General → Selection prefs). + // + // Secondary path: arboard. When a real clipboard daemon is + // reachable, this also syncs into the X11/Wayland clipboard + // so other GUI apps see the text. Failures here are silent + // because OSC 52 above is already authoritative — the + // X11-unreachable / Wayland-without-perms case used to + // surface as a noisy "clipboard copy failed" Notice. + send_osc52(&out); + let _ = arboard::Clipboard::new().and_then(|mut c| c.set_text(out)); + } + + async fn handle_terminal_event(&mut self, ev: Event) { + if let Event::Mouse(mouse) = ev { + self.handle_mouse_event(mouse); + return; + } + let Event::Key(key) = ev else { return }; + if !matches!(key.kind, KeyEventKind::Press | KeyEventKind::Repeat) { + return; + } + + if self.reverse_search.is_some() { + self.handle_reverse_search_key(key).await; + return; + } + + match (key.code, key.modifiers) { + (KeyCode::Char('d'), KeyModifiers::CONTROL) => { + if self.input.is_empty() { + self.push(ScrollbackKind::Notice, "exit".to_string()); + self.exit = true; + } + } + (KeyCode::Char('c'), KeyModifiers::CONTROL) => { + // Clear the current input (reedline convention). Once + // we have eval cancellation we'll also kick the + // worker here when an eval is in flight. + self.input = TextArea::default(); + self.history_cursor = None; + self.history_draft.clear(); + } + (KeyCode::F(2), _) => { + // Flip terminal mouse-capture mode. OFF (the default) + // lets the terminal handle text-selection drags + // natively; ON routes wheel events into the app for + // scrollback navigation, at the cost of text + // selection requiring Shift-drag / Option-drag. + self.toggle_mouse_capture(); + } + (KeyCode::Char('r'), KeyModifiers::CONTROL) => { + self.reverse_search = Some(ReverseSearch { + query: String::new(), + match_index: None, + match_text: String::new(), + }); + } + (KeyCode::Char('p'), KeyModifiers::CONTROL) => { + self.history_step(-1); + } + (KeyCode::Char('n'), KeyModifiers::CONTROL) => { + self.history_step(1); + } + // Scrollback nav. PageUp/PageDown for keyboards that + // have them; Ctrl-K (up) / Ctrl-J (down) for compact + // keyboards (Mac laptops, 60% boards) where PageUp + // doesn't exist physically. Vim-style direction + // mapping — `k` is up, `j` is down. Picked over + // Ctrl-↑/↓ because macOS intercepts those (Mission + // Control / app-switching). + // + // Direction: see `handle_mouse_event` — `k`/PageUp + // moves the viewport UP toward older content, which + // means decreasing the y-scroll offset. + (KeyCode::PageUp, _) + | (KeyCode::Char('k'), KeyModifiers::CONTROL) => { + self.scroll_by(-5); + } + (KeyCode::PageDown, _) + | (KeyCode::Char('j'), KeyModifiers::CONTROL) => { + self.scroll_by(5); + } + // Snap to bottom + re-engage tail-follow. Use End + // (when available) or Ctrl-G as the compact-keyboard + // alternative. After scrolling up to inspect old + // output the user explicitly requests "back to live" + // here; we no longer auto-re-engage on every scroll + // (that auto-engage was firing spuriously due to a + // raw-vs-wrapped line-count mismatch, making scroll + // appear dead on large outputs). + (KeyCode::End, _) | (KeyCode::Char('g'), KeyModifiers::CONTROL) => { + self.scrollback_follow = true; + } + (KeyCode::Enter, KeyModifiers::NONE) => { + self.on_submit().await; + } + (KeyCode::Enter, m) + if m.contains(KeyModifiers::ALT) + || m.contains(KeyModifiers::SHIFT) => + { + // Explicit newline regardless of parser + // completeness — escape hatch for "I really want to + // keep typing." + self.input.insert_newline(); + } + _ => { + // Forward everything else to the text editor. + // Once the user starts editing, drop the history + // cursor so subsequent Ctrl-P starts at "newest" + // again — readline behavior: an edited recall is + // a new entry, not a continued walk. + self.history_cursor = None; + let input: Input = key.into(); + let _consumed = self.input.input(input); + } + } + } + + async fn handle_reverse_search_key(&mut self, key: KeyEvent) { + let Some(rs) = self.reverse_search.as_mut() else { + return; + }; + match (key.code, key.modifiers) { + (KeyCode::Esc, _) => { + self.reverse_search = None; + } + (KeyCode::Enter, _) => { + let text = std::mem::take(&mut rs.match_text); + self.reverse_search = None; + if !text.is_empty() { + self.set_input_to(&text); + } + } + (KeyCode::Char('r'), KeyModifiers::CONTROL) => { + let start = rs.match_index; + if let Some((idx, hit)) = + self.history.search_back(&rs.query, start) + { + rs.match_index = Some(idx); + rs.match_text = hit.to_string(); + } + } + (KeyCode::Backspace, _) => { + rs.query.pop(); + self.rerun_reverse_search(); + } + (KeyCode::Char(c), m) + if !m.contains(KeyModifiers::CONTROL) + && !m.contains(KeyModifiers::ALT) => + { + rs.query.push(c); + self.rerun_reverse_search(); + } + _ => {} + } + } + + fn rerun_reverse_search(&mut self) { + let Some(rs) = self.reverse_search.as_mut() else { + return; + }; + match self.history.search_back(&rs.query, None) { + Some((idx, hit)) => { + rs.match_index = Some(idx); + rs.match_text = hit.to_string(); + } + None => { + rs.match_index = None; + rs.match_text.clear(); + } + } + } + + fn set_input_to(&mut self, text: &str) { + let mut ta = TextArea::default(); + ta.set_cursor_line_style(ratatui::style::Style::default()); + for (i, line) in text.split('\n').enumerate() { + if i > 0 { + ta.insert_newline(); + } + ta.insert_str(line); + } + self.input = ta; + } + + async fn on_submit(&mut self) { + let text = self.current_input_text(); + let trimmed = text.trim(); + if trimmed.is_empty() { + return; + } + if !is_buffer_complete(&text) { + self.input.insert_newline(); + return; + } + + self.history.append(&text); + self.push(ScrollbackKind::Input, text.clone()); + + if let Some(cmd) = trimmed.strip_prefix(':') { + self.run_meta_command(cmd).await; + } else { + self.dispatch_eval(text).await; + } + + self.input = TextArea::default(); + // Reset history walk: the next Ctrl-P should start from the + // newest entry, not pick up where the previous walk left + // off across submits. + self.history_cursor = None; + self.history_draft.clear(); + // Re-engage tail-follow: every new submit shows the input + // echo + its output at the bottom of the viewport, even + // if the user had scrolled up to inspect earlier results. + // The actual pin happens in the renderer next frame. + self.scrollback_follow = true; + } + + fn resolve_stack_frames(&self, msg: &str) -> String { + resolve_stack_frames( + msg, + &self.session, + self.pending_batch.as_ref(), + self.input_file_for_resolve(), + ) + } + + /// Append a synthetic ` at :` frame to streamed + /// warnings/errors that already arrived without a stack. Vivado + /// emits some message classes (notably `[IP_Flow 19-7090]` + /// "Invalid parameter" warnings during `set_property`) from + /// C++ code paths that bypass the Tcl-level `send_msg_id` + /// override — so the shim never sees them and can't attach a + /// real Tcl call stack. The fallback is "which user command + /// was the worker chewing on when this byte stream arrived," + /// which `pending_origins[pending_eval_index]` gives us. Won't + /// add a frame if the message already has one (the + /// `\n at …` shape from `attach_stack_if_message`) or if it + /// isn't a warning/error severity. + fn tag_streamed_message( + &self, + kind: ScrollbackKind, + msg: String, + ) -> String { + if !matches!(kind, ScrollbackKind::Warning | ScrollbackKind::Error) { + return msg; + } + if msg.contains("\n at ") { + return msg; + } + let Some(origin) = self.pending_origins.get(self.pending_eval_index) + else { + return msg; + }; + let path = match origin.file.as_deref() { + Some(p) => display_path(p), + None => match self.input_file_for_resolve() { + Some(p) => display_path(p), + None => "".into(), + }, + }; + format!("{msg}\n at {path}:{}", origin.line) + } + + /// File to substitute for `` frames in stack traces. + /// Comes from `--load ` for the auto-loaded program — its + /// content was copied verbatim into the lowering scratch, so + /// scratch line N corresponds to load-file line N. For + /// REPL-typed input there's no source file, so callers leave + /// `` as-is. + fn input_file_for_resolve(&self) -> Option<&std::path::Path> { + self.opts.initial_load.as_ref().map(|p| p.as_std_path()) + } + + async fn dispatch_eval(&mut self, text: String) { + self.dispatch_eval_with_echo(text, false).await; + } + + /// Same as [`dispatch_eval`] but echoes each lowered top-level + /// statement as an Input entry first. Used by the `--load` + /// auto-run path so the user can see *which* commands ran when + /// reading the trace, the same way manual REPL input shows up + /// as `› ` for each submit. + async fn dispatch_eval_with_echo(&mut self, text: String, echo: bool) { + if matches!(self.worker_state, WorkerState::Down) { + self.push( + ScrollbackKind::Error, + "vivado worker is down — try :restart".into(), + ); + return; + } + if matches!(self.worker_state, WorkerState::Starting) { + self.push( + ScrollbackKind::Notice, + "queued — vivado still starting".into(), + ); + } + + // Lower htcl → Tcl through the same loader / signature- + // table / call-site-rewrite pipeline `vw run` uses, against + // the workspace whose `vw.toml` lives at or above the cwd. + // A lowering failure (unknown dep, parse error in an + // imported file, etc.) never reaches Vivado. + let cwd = std::env::current_dir().unwrap_or_else(|_| ".".into()); + let lowered = match crate::lower::prepare(&text, &cwd, &self.session) { + Ok(l) => l, + Err(e) => { + // The user cares "did my input run or not" — the + // fact that this came back from the lowering + // pipeline (vs. the Vivado worker) is internal + // accounting. Just say ERROR. + self.push(ScrollbackKind::Error, format!("ERROR: {e}")); + return; + } + }; + + // Surface any pre-flight warnings *before* shipping. If the + // eval then fails, the user already has the context they + // need to interpret the Vivado error. + for w in &lowered.warnings { + let where_ = + render_origin_path(w.origin.file.as_deref(), w.origin.line); + self.push( + ScrollbackKind::Warning, + format!("warning: {where_}: {}", w.message), + ); + } + if lowered.commands.is_empty() { + // Pure `src` import or comments-only input. Commit the + // parsed batch to the session anyway so future + // analyzer queries see the imported procs. + self.session.commit(lowered.batch); + self.push(ScrollbackKind::Notice, "(no Tcl to evaluate)".into()); + return; + } + + // Build per-Input-entry timer boundaries before echo so + // we can map each echoed Input to its last lowered + // command. Empty when not in echo mode (the non-echo + // single-Input case uses the existing + // `mark_inputs_completed` end-of-batch path). + let mut input_boundaries: Vec = Vec::new(); + if echo { + // Push Input entries first, recording their + // scrollback indices for later timer freezing. + for origin in &lowered.entry_top_level { + let idx = self.scrollback.len(); + self.push(ScrollbackKind::Input, origin.snippet.clone()); + input_boundaries.push(InputBoundary { + scrollback_idx: idx, + last_command_idx: 0, // filled below + completed: false, + }); + } + // For each entry-top-level Origin, find the LAST + // lowered command whose ultimate entry-file line + // matches it. A command's "entry line" is the line + // in the entry file it came from: directly when + // `origin.via` is empty (the command lives in the + // entry), or the bottom of the `via` chain (which + // lower.rs documents as "the last frame is the + // entry file / user input"). + for (cmd_idx, cmd) in lowered.commands.iter().enumerate() { + let entry_line = match cmd.origin.via.last() { + Some(f) => f.line, + None => cmd.origin.line, + }; + // Find which entry_top_level Origin this matches + // (linear scan — at most a handful of top-level + // statements per batch). + for (j, top) in lowered.entry_top_level.iter().enumerate() { + if top.line == entry_line { + if let Some(b) = input_boundaries.get_mut(j) { + b.last_command_idx = cmd_idx; + } + break; + } + } + } + // Reset the first entry's `started_at` to NOW — + // ensures the timer is anchored to dispatch time + // (mostly redundant with `push`-time stamping, but + // explicit). Subsequent entries' `started_at` is + // updated as the previous entry completes (see + // EvalDone handler). + if let Some(b) = input_boundaries.first() { + if let Some(entry) = self.scrollback.get_mut(b.scrollback_idx) { + entry.started_at = Some(std::time::Instant::now()); + } + } + } + self.pending_input_boundaries = input_boundaries; + + // Snapshot per-command origins + types for the stream- + // tagging + result-display paths. EvalBatch consumes + // `lowered.commands` below, so we grab both first. + self.pending_origins = + lowered.commands.iter().map(|c| c.origin.clone()).collect(); + self.pending_return_types = lowered + .commands + .iter() + .map(|c| c.expected_return_type.clone()) + .collect(); + self.pending_eval_index = 0; + + // Commit to the session only after every command in the + // batch succeeds (see `handle_worker_event`); a failure + // mid-batch shouldn't pollute the analyzer's view. + let _ = self + .worker_tx + .send(WorkerCmd::EvalBatch(lowered.commands)) + .await; + self.pending_batch = Some(lowered.batch); + self.worker_state = WorkerState::Running; + } + + async fn run_meta_command(&mut self, cmd: &str) { + let mut parts = cmd.splitn(2, char::is_whitespace); + let name = parts.next().unwrap_or(""); + let arg = parts.next().unwrap_or("").trim(); + match name { + "quit" | "q" | "exit" => { + self.exit = true; + } + "restart" => { + self.push( + ScrollbackKind::Notice, + "restart not yet implemented (stubbed for v1)".into(), + ); + } + "load" => { + if arg.is_empty() { + self.push( + ScrollbackKind::Error, + ":load needs a path".into(), + ); + return; + } + match std::fs::read_to_string(arg) { + Ok(content) => { + self.push( + ScrollbackKind::Notice, + format!("loading {arg}"), + ); + self.dispatch_eval(content).await; + } + Err(e) => { + self.push( + ScrollbackKind::Error, + format!("could not read {arg}: {e}"), + ); + } + } + } + other => { + self.push( + ScrollbackKind::Error, + format!("unknown meta-command :{other}"), + ); + } + } + } + + async fn handle_worker_event(&mut self, event: WorkerEvent) { + match event { + WorkerEvent::Started => { + self.worker_state = WorkerState::Ready; + self.push(ScrollbackKind::Notice, "vivado ready".into()); + if let Some(path) = self.opts.initial_load.clone() { + match std::fs::read_to_string(path.as_std_path()) { + Ok(content) => { + self.push( + ScrollbackKind::Notice, + format!("auto-loading {path}"), + ); + self.dispatch_eval_with_echo(content, true).await; + } + Err(e) => { + self.push( + ScrollbackKind::Error, + format!("could not read {path}: {e}"), + ); + } + } + } + } + WorkerEvent::StartFailed(e) => { + self.worker_state = WorkerState::Down; + self.push( + ScrollbackKind::Error, + format!("vivado failed to start: {e}"), + ); + } + WorkerEvent::Stream { kind, data } => { + let scrollback_kind = match kind { + vw_vivado::StreamKind::Stdout => ScrollbackKind::Stdout, + vw_vivado::StreamKind::Info => ScrollbackKind::Notice, + vw_vivado::StreamKind::Warning => ScrollbackKind::Warning, + vw_vivado::StreamKind::Error => ScrollbackKind::Error, + }; + // The PTY filter emits one line per chunk and + // the shim's `puts` capture preserves user-side + // newlines; trim a single trailing newline so the + // scrollback's per-entry layout doesn't insert a + // blank gap between Vivado messages. + let trimmed = data.trim_end_matches('\n').to_string(); + if !trimmed.is_empty() { + let resolved = self.resolve_stack_frames(&trimmed); + // Tag warnings/errors that arrived without a + // stack trace with the currently-executing + // user command's origin. Vivado's C++ + // property-validation path emits messages + // straight to the PTY without going through + // `::common::send_msg_id`, so neither shim + // override gets a chance to capture a Tcl + // stack — the best we can do from the + // worker's side is "this happened while + // was running." + let tagged = + self.tag_streamed_message(scrollback_kind, resolved); + self.push(scrollback_kind, tagged); + } + } + WorkerEvent::EvalDone { + origin, + result, + last_in_batch, + } => { + // Grab the return type for THIS command (the one + // that just finished) before we advance the index + // and possibly clear the buffer. + let finished_return_type = self + .pending_return_types + .get(self.pending_eval_index) + .cloned() + .flatten(); + // Capture the just-finished command's eval-index + // before we advance — used to freeze any Input + // entry whose last-command boundary matches it. + let just_finished_idx = self.pending_eval_index; + // Advance past the command that just finished — the + // stream-tagging path uses `pending_origins[index]` + // to label warnings emitted by the *currently* + // executing command, so the index should always + // point at "in-flight," not "just done." + self.pending_eval_index = + self.pending_eval_index.saturating_add(1); + // Per-statement timer freezing: if any echoed + // Input entry's `last_command_idx` matches the + // just-finished command, stamp its + // `completed_at`. If there's a NEXT uncompleted + // boundary, anchor its `started_at` to now so + // its timer starts ticking from this point + // (rather than from batch-dispatch time, which + // would conflate it with the time spent on + // earlier statements). + self.advance_input_timers(just_finished_idx); + if last_in_batch { + self.pending_origins.clear(); + self.pending_return_types.clear(); + self.pending_input_boundaries.clear(); + self.pending_eval_index = 0; + } + match result { + Ok(out) => { + // Drop the per-statement chatter — only the + // last item's value lands in scrollback so a + // `src @vivado-cmd` that runs 851 wrappers + // doesn't drown the user in "ok" lines. The + // intermediate procs etc. are silent unless + // they `puts` something (already streamed). + if last_in_batch { + if !out.stdout.is_empty() { + self.push( + ScrollbackKind::Stdout, + out.stdout + .trim_end_matches('\n') + .to_string(), + ); + } + // Result-rendering policy: + // - `unit`-typed expressions push nothing + // (the value is meaningless by design). + // - Other typed expressions push verbatim + // — the wrapped Tcl already ran the + // type's `repr` proc, so `out.value` + // is the formatted display string. + // - Untyped expressions fall back to the + // legacy heuristic, kept for now while + // the wrapper libraries grow + // annotations. + let suppress = matches!( + finished_return_type.as_ref(), + Some(vw_htcl::TypeExpr::Named { name, .. }) + if name == "unit" + ); + if !suppress && !out.value.is_empty() { + let text = if finished_return_type.is_some() { + out.value.clone() + } else { + pretty_kv_list(&out.value) + .unwrap_or_else(|| out.value.clone()) + }; + self.push(ScrollbackKind::Result, text); + } + if let Some(batch) = self.pending_batch.take() { + self.session.commit(batch); + } + self.worker_state = WorkerState::Ready; + // Freeze per-input timers at their + // final duration now that the batch + // has finished evaluating. + self.mark_inputs_completed(); + } + } + Err(err) => { + self.worker_state = WorkerState::Ready; + // Hold the pending batch for the renderer + // — drill-down lookups need its proc map. + // It's cleared below once the trace is + // emitted (a pending batch only outlives a + // single result event). + render_eval_error(self, &origin, err); + self.pending_batch = None; + // Failed evals also freeze their per-input + // timer — otherwise the live counter would + // tick forever on an error result. + self.mark_inputs_completed(); + } + } + } + } + } + + pub(crate) fn push(&mut self, kind: ScrollbackKind, text: String) { + // O(1). The tail-follow pin happens in the renderer (which + // already knows the wrapped-row total for free), not here — + // doing it per-push was O(N) per call, making a long burst + // of Vivado stream chunks O(N²) and freezing the REPL for + // minutes during `src @vivado-cmd` style fan-outs. + // + // Input entries get a start timestamp so the renderer can + // show a per-input timer (live while running, frozen on + // batch completion). Other kinds leave timing unset. + let started_at = if matches!(kind, ScrollbackKind::Input) { + Some(std::time::Instant::now()) + } else { + None + }; + self.scrollback.push(ScrollbackEntry { + kind, + text, + started_at, + completed_at: None, + }); + } + + /// Per-Input-entry timer advance triggered by an EvalDone. + /// If `just_finished_idx` matches any uncompleted boundary's + /// `last_command_idx`, freeze its scrollback entry's + /// `completed_at` and anchor the next uncompleted boundary's + /// `started_at` to NOW so its timer begins fresh rather than + /// inheriting the elapsed time from earlier statements' + /// commands. + fn advance_input_timers(&mut self, just_finished_idx: usize) { + let now = std::time::Instant::now(); + // Find the first uncompleted boundary whose + // last_command_idx matches. Multi-statement load + // batches process commands in order, so the matching + // boundary is always at the head of the uncompleted + // run. + let mut hit_position: Option = None; + for (i, b) in self.pending_input_boundaries.iter().enumerate() { + if b.completed { + continue; + } + if b.last_command_idx == just_finished_idx { + hit_position = Some(i); + } + break; + } + let Some(hit) = hit_position else { return }; + // Mark this boundary complete + stamp its entry. + let scrollback_idx = self.pending_input_boundaries[hit].scrollback_idx; + self.pending_input_boundaries[hit].completed = true; + if let Some(entry) = self.scrollback.get_mut(scrollback_idx) { + if entry.completed_at.is_none() { + entry.completed_at = Some(now); + } + } + // Start the next uncompleted boundary's timer at NOW. + for next in &self.pending_input_boundaries[hit + 1..] { + if next.completed { + continue; + } + let next_idx = next.scrollback_idx; + if let Some(entry) = self.scrollback.get_mut(next_idx) { + entry.started_at = Some(now); + } + break; + } + } + + /// Stamp `completed_at` on every still-running Input entry + /// from the most recent batch. Called from the `EvalDone` + /// handler on `last_in_batch` so the per-input timers freeze + /// at their final duration once the batch has finished + /// evaluating. For `--load` echoed batches with multiple + /// Input entries (one per top-level statement) all entries + /// freeze at the same wall time — finer-grained per-statement + /// timing would require carrying the eval-to-input mapping + /// through the worker round-trip, which is more plumbing + /// than the v1 timer needs. + fn mark_inputs_completed(&mut self) { + let now = std::time::Instant::now(); + for entry in self.scrollback.iter_mut().rev() { + if matches!(entry.kind, ScrollbackKind::Input) + && entry.completed_at.is_none() + { + entry.completed_at = Some(now); + } else if entry.completed_at.is_some() + && matches!(entry.kind, ScrollbackKind::Input) + { + // Already-completed Input from a prior batch — + // we've walked past the current batch's inputs. + break; + } + } + } + + /// Apply a signed scroll delta (positive = down toward newer + /// content, negative = up toward older content). Disengages + /// tail-follow when the user scrolls up; re-engages when + /// they scroll down past the bottom — same semantics as a + /// scroll-wheel in a terminal emulator. + /// + /// Anchors the new offset against `last_rendered_scroll` (set + /// by the renderer each frame) rather than `scrollback_scroll`, + /// because while tail-follow is on `scrollback_scroll` is stale + /// — the renderer computes the effective bottom-aligned offset + /// without writing it back to that field. Starting the manual + /// delta from the rendered offset is what lets Ctrl-K from + /// tail-follow mode actually move up by 5 instead of jumping + /// to position 5. + fn scroll_by(&mut self, delta: i32) { + let base = self.last_rendered_scroll as i32; + let new = base.saturating_add(delta).max(0) as u16; + if delta < 0 { + self.scrollback_follow = false; + } + self.scrollback_scroll = new; + // Predictively mirror the new offset into + // `last_rendered_scroll`. Without this, drag-to-select + // auto-scrolls but the subsequent `cell_to_buffer` call + // in the same event still uses the previously-rendered + // value — so the selection cursor lags one drag event + // behind the scroll. The renderer will write the + // actually-rendered offset back next frame (which may + // clamp to max_scroll), so this is at worst a one-frame + // optimistic preview. + self.last_rendered_scroll = new; + // No auto-re-engage of tail-follow on scroll. The previous + // logic compared `offset` against a raw `text.lines().count()` + // sum, which dramatically underestimates the wrapped row + // count when entries wrap (a single multi-MB `puts` of a + // nested dict can wrap to tens of thousands of rows while + // contributing one raw line). The underestimate made the + // "are we at the bottom?" threshold fire on any scroll up + // from the bottom, instantly re-engaging follow and snapping + // the viewport back — scroll appeared dead. + // + // Tail-follow re-engages only via explicit user action: an + // `End` or `G` keypress jumps to bottom and reactivates it. + // The cost is that auto-snap-back after new output stops + // being free; the win is that scroll actually works at scale. + } +} + +// --------------------------------------------------------------------- +// Worker task: owns the Vivado backend, serializes evals. +// --------------------------------------------------------------------- + +async fn worker_task( + mut rx: mpsc::Receiver, + tx: mpsc::UnboundedSender, + verbose: bool, + verbose_log: Option, +) { + let backend = vw_vivado::VivadoBackend::spawn(vw_vivado::VivadoConfig { + verbose, + verbose_log, + ..Default::default() + }) + .await; + let mut backend = match backend { + Ok(b) => { + let _ = tx.send(WorkerEvent::Started); + b + } + Err(e) => { + let _ = tx.send(WorkerEvent::StartFailed(e)); + return; + } + }; + + // Stream chunks to the UI as they arrive. The closure + // captures the unbounded sender so it can fire without + // awaiting. The kind tag (`StreamKind::Stdout` for user `puts` + // output, `Warning`/`Error`/`Info` for Vivado's own message + // lines harvested from the PTY) flows through unchanged so + // the UI can colour them appropriately. + let stdout_tx = tx.clone(); + backend.set_stdout_sink(move |kind, chunk: &str| { + let _ = stdout_tx.send(WorkerEvent::Stream { + kind, + data: chunk.to_string(), + }); + }); + + while let Some(cmd) = rx.recv().await { + match cmd { + WorkerCmd::EvalBatch(items) => { + let total = items.len(); + for (i, item) in items.into_iter().enumerate() { + let result = backend.eval(&item.tcl).await; + let failed = result.is_err(); + let last_in_batch = i + 1 == total || failed; + let _ = tx.send(WorkerEvent::EvalDone { + origin: item.origin, + result, + last_in_batch, + }); + // Stop the batch at the first failure — running + // the rest of a script after an error confuses + // the user and risks side effects nobody + // intended. + if failed { + break; + } + } + } + WorkerCmd::Shutdown => break, + } + } + let _ = backend.shutdown().await; +} + +// --------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------- + +/// Render a Vivado error as a clean Python-style stack trace — +/// one frame per `file:line` from the outermost `src` the user +/// typed, down through any nested `src` imports, the leaf +/// statement we shipped, and any `(procedure X line N)` frames the +/// Tcl interpreter reported. The error message itself comes last. +/// +/// ```text +/// ip/cips.htcl:1 +/// src @cips +/// ~/src/htcl/amd/cips/module.htcl:3 +/// ip::check -name "xilinx.com:ip:versal_cips:3.4" +/// ~/src/htcl/amd/vivado-cmd/ip.htcl:18 +/// set ip_obj [get_ipdefs -all "$name"] +/// ERROR: [Common 17-53] No open project. ... +/// ``` +fn render_eval_error( + app: &mut App, + origin: &crate::lower::Origin, + err: vw_eda::BackendError, +) { + let mut frames: Vec = Vec::new(); + + // Outermost first: walk the `via` chain in reverse so the + // entry `src` lands at the top. + for f in origin.via.iter().rev() { + frames.push(Frame { + file: f.file.clone(), + line: f.line, + snippet: f.snippet.clone(), + }); + } + // Leaf htcl statement — the actual call site that triggered + // the Tcl evaluation. + frames.push(Frame { + file: origin.file.clone(), + line: origin.line, + snippet: origin.snippet.clone(), + }); + + // If Vivado gave us a Tcl trace, drill into any + // `(procedure "X" line N)` frames whose proc we recognize, so + // the user sees the actual failing line inside the proc body + // — not just the call to it. + let (message, code, info, stdout) = match err { + vw_eda::BackendError::Tcl { + message, + code, + info, + stdout, + } => (message, code, info, stdout), + other => { + for frame in &frames { + push_frame(app, frame); + } + app.push(ScrollbackKind::Error, format!("{other}")); + return; + } + }; + if let Some(info) = info.as_deref() { + for tcl_frame in parse_tcl_proc_frames(info) { + // Check the in-flight batch first (the lowering that + // just ran), then fall back to prior session batches. + // This is what gives wrappers declared in earlier + // inputs a real `.htcl` path in the drill-down trace + // instead of an `(input):N` line in a vanished scratch. + let loc = app + .pending_batch + .as_ref() + .and_then(|b| b.procs.get(&tcl_frame.proc)) + .or_else(|| app.session.lookup_proc(&tcl_frame.proc)); + let Some(loc) = loc else { continue }; + let Some((abs_line, content)) = + loc.resolve_body_line(tcl_frame.line) + else { + continue; + }; + frames.push(Frame { + file: loc.file.clone(), + line: abs_line, + snippet: content.trim().to_string(), + }); + } + } + + if !stdout.is_empty() { + app.push( + ScrollbackKind::Stdout, + stdout.trim_end_matches('\n').to_string(), + ); + } + + for frame in &frames { + push_frame(app, frame); + } + app.push(ScrollbackKind::Error, message.trim().to_string()); + if let Some(code) = code.filter(|s| !s.is_empty() && s != "NONE") { + app.push(ScrollbackKind::Notice, format!("({code})")); + } +} + +struct Frame { + file: Option, + line: u32, + snippet: String, +} + +fn push_frame(app: &mut App, frame: &Frame) { + let where_ = render_origin_path(frame.file.as_deref(), frame.line); + app.push(ScrollbackKind::Notice, where_); + if frame.snippet.is_empty() { + return; + } + // Indent every line of the snippet — for a multi-line + // command (`set proj [\n create_project\n -name x\n]`) + // this preserves the user's relative indentation so the + // structure is readable, while the gutter prefix added by + // `entry_lines` distinguishes the first line from the + // continuations. + let body = frame + .snippet + .lines() + .map(|line| format!(" {line}")) + .collect::>() + .join("\n"); + app.push(ScrollbackKind::Notice, body); +} + +/// Parse `(procedure "NAME" line N)` annotations out of Tcl's +/// `$errorInfo`. Returned in the order they appear in `info`, +/// which is innermost-first per Tcl convention — but the renderer +/// wants OUTERMOST-first (we already have the outer leaf frame from +/// the htcl side), so we reverse here and yield the inner frames +/// in execution order. +fn parse_tcl_proc_frames(info: &str) -> Vec { + let mut out = Vec::new(); + for line in info.lines() { + let trimmed = line.trim(); + // Expected shape: `(procedure "NAME" line N)` + let Some(rest) = trimmed.strip_prefix("(procedure \"") else { + continue; + }; + let Some((name, rest)) = rest.split_once("\" line ") else { + continue; + }; + let Some(num) = rest.strip_suffix(')') else { + continue; + }; + let Ok(n) = num.parse::() else { continue }; + out.push(TclProcFrame { + proc: name.to_string(), + line: n, + }); + } + // errorInfo lists innermost first; we want execution order + // (outermost first) so reverse. + out.reverse(); + out +} + +struct TclProcFrame { + proc: String, + line: u32, +} + +/// Rewrite `:N in ::procname` frames in a Vivado message to +/// point at the actual htcl source file and line. Delegates the +/// per-line parsing + dedup to [`crate::trace`], which is shared +/// with the `vw run` CLI driver so both surfaces render the same. +/// This wrapper closes over the REPL's session+pending proc lookup. +fn resolve_stack_frames( + msg: &str, + session: &Session, + pending: Option<&SessionBatch>, + input_file: Option<&std::path::Path>, +) -> String { + crate::trace::resolve_stack_frames_with( + msg, + |name| { + pending + .and_then(|b| b.procs.get(name)) + .or_else(|| session.lookup_proc(name)) + .cloned() + }, + input_file, + ) +} + +/// If `text` looks like a Tcl key-value list (an even number of +/// elements where the keys are property-name-shaped), reformat it +/// one pair per line. Returns `None` to mean "leave the original +/// output alone" — the caller falls back to the raw string for +/// scalars, odd-length lists, lists of non-key-shaped tokens, etc. +/// +/// We do this on Tcl return values, where `report_property`-style +/// dicts (`KEY1 VAL1 KEY2 VAL2 …`) are common and unreadable as a +/// single wrapped line. +fn pretty_kv_list(text: &str) -> Option { + let elements = tcl_list_split(text.trim())?; + // Heuristic: at least 2 pairs, even count, keys look like + // property names. Two pairs is the minimum where the + // one-per-line layout actually helps — a single pair is fine + // as-is. + if elements.len() < 4 || elements.len() % 2 != 0 { + return None; + } + for chunk in elements.chunks(2) { + if !is_propname_like(&chunk[0]) { + return None; + } + } + let mut out = String::with_capacity(text.len() + elements.len()); + for (i, chunk) in elements.chunks(2).enumerate() { + if i > 0 { + out.push('\n'); + } + out.push_str(&chunk[0]); + out.push(' '); + // Re-brace values that contain whitespace or are empty so + // the displayed line is itself valid Tcl — the user can + // copy any line straight back into a `set` / `dict set` + // call. + let val = &chunk[1]; + if val.is_empty() + || val.chars().any(char::is_whitespace) + || val.contains('"') + { + out.push('{'); + out.push_str(val); + out.push('}'); + } else { + out.push_str(val); + } + } + Some(out) +} + +/// Minimal Tcl-list tokenizer: split on whitespace at the top +/// level, honoring `{…}` grouping with nesting and `\` +/// escapes. Returns `None` on unbalanced braces — caller falls +/// back to the raw string when this happens (better to show +/// something than nothing). Doesn't handle `"…"` grouping because +/// Vivado's list returns never use it; if that changes, add a +/// branch mirroring the brace one. +fn tcl_list_split(s: &str) -> Option> { + let mut out = Vec::new(); + let mut chars = s.chars().peekable(); + while let Some(&c) = chars.peek() { + if c.is_whitespace() { + chars.next(); + continue; + } + if c == '{' { + chars.next(); + let mut depth = 1usize; + let mut buf = String::new(); + while let Some(c) = chars.next() { + match c { + '\\' => { + if let Some(esc) = chars.next() { + buf.push(c); + buf.push(esc); + } + } + '{' => { + depth += 1; + buf.push(c); + } + '}' => { + depth -= 1; + if depth == 0 { + break; + } + buf.push(c); + } + _ => buf.push(c), + } + } + if depth != 0 { + return None; + } + out.push(buf); + } else { + let mut buf = String::new(); + while let Some(&c) = chars.peek() { + if c.is_whitespace() { + break; + } + if c == '\\' { + chars.next(); + if let Some(esc) = chars.next() { + buf.push(esc); + } + continue; + } + buf.push(c); + chars.next(); + } + out.push(buf); + } + } + Some(out) +} + +/// "Looks like a property name": ASCII alphanumeric with `_`, `.`, +/// `-`. Used by `pretty_kv_list` to filter out lists-of-arbitrary- +/// strings that just happen to be even-length. Empty strings fail +/// (would render ` ` and look broken). +fn is_propname_like(s: &str) -> bool { + !s.is_empty() + && s.chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '.' | '-')) +} + +fn render_origin_path(file: Option<&std::path::Path>, line: u32) -> String { + match file { + Some(p) => format!("{}:{line}", display_path(p)), + None => format!("(input):{line}"), + } +} + +/// Shorten a path for display: drop the cwd prefix when it lines +/// up, leave it absolute otherwise. Saves screen real estate when +/// reporting errors from a dep cached deep under `~/.vw/deps/...`. +fn display_path(path: &std::path::Path) -> String { + if let Ok(cwd) = std::env::current_dir() { + if let Ok(rel) = path.strip_prefix(&cwd) { + return rel.display().to_string(); + } + if let Some(home) = dirs::home_dir() { + if let Ok(rel) = path.strip_prefix(&home) { + return format!("~/{}", rel.display()); + } + } + } + path.display().to_string() +} + +/// Decide whether the input buffer parses cleanly enough to ship to +/// Write an OSC 52 set-clipboard escape to stdout, base64-encoding +/// `text` per the protocol. The terminal puts the decoded text on +/// the system clipboard — no DISPLAY/Wayland-socket/pbcopy +/// dependency, and the same code path works over SSH. +/// +/// Some terminals cap the payload size at ~74KB (the original xterm +/// limit) or somewhere similar; selections larger than that may be +/// truncated by the terminal. Encoding/IO errors are swallowed — +/// the caller has nowhere useful to surface them, since OSC 52 is +/// fire-and-forget (the terminal doesn't ack). +fn send_osc52(text: &str) { + use base64::engine::general_purpose::STANDARD; + use base64::Engine; + use std::io::Write; + let encoded = STANDARD.encode(text.as_bytes()); + let payload = format!("\x1b]52;c;{encoded}\x07"); + let mut stdout = std::io::stdout(); + let _ = stdout.write_all(payload.as_bytes()); + let _ = stdout.flush(); +} + +/// Vivado, or whether the user is still in the middle of typing +/// (unterminated brace, etc.). We re-use the htcl parser since +/// it already understands every multi-line construct (procs, +/// `[ … ]` substitutions, braced groups). +fn is_buffer_complete(text: &str) -> bool { + let parsed = vw_htcl::parse(text); + !parsed + .errors + .iter() + .any(|e| e.message.contains("unterminated")) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn buffer_complete_for_simple_statement() { + assert!(is_buffer_complete("set x 1")); + assert!(is_buffer_complete("puts \"hi\"")); + } + + #[test] + fn buffer_incomplete_with_unterminated_brace() { + assert!(!is_buffer_complete( + "set x [\n create_cpm5\n -name cpm5" + )); + assert!(!is_buffer_complete("proc foo {")); + } + + #[test] + fn buffer_complete_for_multiline_well_formed_proc() { + assert!(is_buffer_complete( + "proc foo {\n @default(1) x\n} {\n puts $x\n}" + )); + } + + // --- stack-frame resolution --------------------------------- + + use crate::lower::ProcLocation; + use crate::session::SessionBatch; + use std::collections::HashMap; + use std::path::PathBuf; + use vw_htcl::{parse, LoadedProgram}; + + fn session_with_proc( + proc: &str, + file: PathBuf, + body_start_line: u32, + body_lines: Vec, + ) -> Session { + // Session stores proc names without the leading `::` — + // see `lower::qualify`. + let key = proc.strip_prefix("::").unwrap_or(proc); + let src = format!("proc {key} {{}} {{}}\n"); + let parsed = parse(&src); + let mut procs = HashMap::new(); + procs.insert( + key.to_string(), + ProcLocation { + file: Some(file), + body_start_line, + body_lines, + }, + ); + let batch = SessionBatch { + program: LoadedProgram { + source: src, + files: Vec::new(), + regions: Vec::new(), + }, + document: parsed.document, + procs, + }; + let mut s = Session::new(); + s.commit(batch); + s + } + + #[test] + fn rewrite_resolves_input_line_to_absolute_file_line() { + let session = session_with_proc( + "::configure_cips", + "ip/cips.htcl".into(), + 95, + (0..30).map(|i| format!("body line {i}")).collect(), + ); + let frame = crate::trace::rewrite_stack_line( + " at :14 in ::configure_cips", + |name| session.lookup_proc(name).cloned(), + None, + ) + .expect("should resolve"); + // body line 14 = body_start_line (95) + (14 - 1) = 108 + assert!( + frame.formatted.contains("ip/cips.htcl:108"), + "got {:?}", + frame.formatted + ); + assert_eq!(frame.line, 108); + } + + #[test] + fn rewrite_resolves_namespaced_proc() { + // Tcl reports `::port::plumb_if_pin` (with leading `::`) + // but the session indexes it as `port::plumb_if_pin`. + let session = session_with_proc( + "::port::plumb_if_pin", + "vivado-cmd/port.htcl".into(), + 70, + (0..10).map(|i| format!("line {i}")).collect(), + ); + let frame = crate::trace::rewrite_stack_line( + " at :5 in ::port::plumb_if_pin", + |name| session.lookup_proc(name).cloned(), + None, + ) + .expect("should resolve namespaced proc"); + assert!( + frame.formatted.contains("vivado-cmd/port.htcl:74"), + "got {:?}", + frame.formatted + ); + } + + #[test] + fn rewrite_passes_unknown_proc_through() { + let session = Session::new(); + assert!(crate::trace::rewrite_stack_line( + " at :14 in ::vivado_builtin_thing", + |name| session.lookup_proc(name).cloned(), + None, + ) + .is_none()); + } + + #[test] + fn rewrite_skips_non_frame_lines() { + let session = Session::new(); + assert!(crate::trace::rewrite_stack_line( + "WARNING: [Common 17-1] something", + |name| session.lookup_proc(name).cloned(), + None, + ) + .is_none()); + assert!(crate::trace::rewrite_stack_line( + "", + |name| session.lookup_proc(name).cloned(), + None, + ) + .is_none()); + } + + #[test] + fn resolve_dedupes_adjacent_same_proc_frames() { + // Two consecutive `:N in ::port::plumb_if_pin` frames + // resolving to the same absolute line should collapse to one. + let session = session_with_proc( + "::port::plumb_if_pin", + "vivado-cmd/port.htcl".into(), + 70, + (0..10).map(|i| format!("line {i}")).collect(), + ); + let msg = "\ +WARNING: [port::plumb_if_pin-1] skipping foo + at :5 in ::port::plumb_if_pin + at :5 in ::port::plumb_if_pin"; + let out = resolve_stack_frames(msg, &session, None, None); + // Only one resolved frame line should remain. + let count = out + .lines() + .filter(|l| l.contains("port::plumb_if_pin")) + .count(); + assert_eq!(count, 2, "got:\n{out}"); // header + 1 frame + } + + // --- pretty kv list ----------------------------------------- + + #[test] + fn tcl_list_split_handles_braces_and_nesting() { + assert_eq!( + tcl_list_split("a b c d").unwrap(), + vec!["a", "b", "c", "d"] + ); + assert_eq!( + tcl_list_split("KEY {nested value} OTHER 1").unwrap(), + vec!["KEY", "nested value", "OTHER", "1"] + ); + assert_eq!( + tcl_list_split("OUTER {INNER {DEEP value}} END 2").unwrap(), + vec!["OUTER", "INNER {DEEP value}", "END", "2"] + ); + // Unbalanced braces → None. + assert!(tcl_list_split("a {b c").is_none()); + } + + #[test] + fn pretty_kv_list_breaks_pairs_onto_lines() { + let s = "CLASS bd_cell NAME cips PATH /cips"; + let out = pretty_kv_list(s).unwrap(); + assert_eq!(out, "CLASS bd_cell\nNAME cips\nPATH /cips"); + } + + #[test] + fn pretty_kv_list_rebraces_values_with_whitespace() { + let s = "ALLOWED_SIM_MODELS {tlm rtl} CLASS bd_cell COMBINED rtl_tlm"; + let out = pretty_kv_list(s).unwrap(); + assert_eq!( + out, + "ALLOWED_SIM_MODELS {tlm rtl}\nCLASS bd_cell\nCOMBINED rtl_tlm" + ); + } + + #[test] + fn pretty_kv_list_declines_non_kv_lists() { + // Odd-length: not a dict. + assert!(pretty_kv_list("a b c").is_none()); + // Two elements: declined (single pair gains nothing from + // reflow). + assert!(pretty_kv_list("a b").is_none()); + // Non-propname keys: looks more like prose than a dict. + assert!(pretty_kv_list("hello world foo bar").is_some()); + // … but the same elements with one non-propname key fail. + assert!(pretty_kv_list("hello world foo! bar").is_none()); + } +} diff --git a/vw-repl/src/highlight.rs b/vw-repl/src/highlight.rs new file mode 100644 index 0000000..33c46f5 --- /dev/null +++ b/vw-repl/src/highlight.rs @@ -0,0 +1,312 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Syntax highlighter for compiler-emitted enum reprs. +//! +//! Reprs follow a uniform shape regardless of which enum produced +//! them (the compiler emits `Variant`, `Variant(payload)`, or +//! `Variant(\n inner\n)` for any user-declared enum, and +//! dict/list reprs join entries with `\n`). This module recognizes +//! that shape line-by-line and emits styled +//! [`ratatui::text::Span`]s — keys in blue, variant names in teal, +//! punctuation in dim, scalar payloads in green. +//! +//! Shape-based, not name-based: the highlighter has no knowledge +//! of `Property` / `Properties` / any specific enum. It recognizes +//! the structural pattern (`IDENT '(' … ')'` for variant calls, +//! `KEY SP VARIANT …` for dict entries, bare `)` for multi-line +//! close), so adding a new enum to the htcl source automatically +//! gets the same highlighting on its repr output. +//! +//! Falls back to plain text when a line doesn't parse — non-repr +//! content (raw `puts` output, error messages, etc.) renders +//! normally. +//! +//! Color palette is exported so [`crate::render::entry_lines`] +//! can apply the same fallback `body_style` for unparsed runs. + +use ratatui::style::{Color, Modifier, Style}; +use ratatui::text::Span; +use winnow::ascii::space0; +use winnow::combinator::repeat; +use winnow::error::ContextError; +use winnow::token::take_while; +use winnow::{ModalResult, Parser}; + +/// Style for dict keys (`CONFIG`, `CPM_PCIE0_MODES`, …) — the +/// identifier immediately preceding a value. +pub fn key_style() -> Style { + Style::default().fg(Color::Rgb(80, 150, 255)) +} + +/// Style for enum variant names (`Scalar`, `Nested`, …) — the +/// identifier immediately preceding `(`. +pub fn variant_style() -> Style { + Style::default().fg(Color::Rgb(100, 200, 200)) +} + +/// Style for structural punctuation (`(` and `)`) — dimmed so the +/// nesting structure recedes visually next to keys and values. +pub fn punct_style() -> Style { + Style::default().add_modifier(Modifier::DIM) +} + +/// Style for scalar payloads — the string inside `Scalar(…)`. +pub fn scalar_style() -> Style { + Style::default().fg(Color::Rgb(120, 200, 120)) +} + +/// Try to recognize `line` as a compiler-emitted enum-repr line +/// and return a styled span sequence. Returns `None` when the +/// line doesn't match the repr grammar — caller falls back to +/// rendering the raw text with its default body style. +pub fn highlight_line(line: &str) -> Option>> { + let mut input = line; + parse_line.parse_next(&mut input).ok().filter(|spans| { + // Reject parses that didn't consume the whole line — a + // partial match means we'd silently style some tokens + // and drop the rest. Better to fall through to plain. + input.is_empty() && !spans.is_empty() + }) +} + +// Top-level line shapes: +// `INDENT? ')' [SP] EOL` — multi-line close +// `INDENT? KEY SP VALUE [SP]? EOL` — dict entry +fn parse_line(input: &mut &str) -> ModalResult>> { + let mut spans: Vec> = Vec::new(); + let indent = space0::<_, ContextError>.parse_next(input)?; + if !indent.is_empty() { + spans.push(Span::raw(indent.to_string())); + } + // Multi-line-close line: bare `)`, optionally followed by trailing whitespace. + if input.starts_with(')') { + let close = ")"; + *input = &input[1..]; + spans.push(Span::styled(close.to_string(), punct_style())); + let trailing = space0::<_, ContextError>.parse_next(input)?; + if !trailing.is_empty() { + spans.push(Span::raw(trailing.to_string())); + } + return Ok(spans); + } + // Dict-entry line: KEY SP VALUE + let key = parse_ident(input)?; + spans.push(Span::styled(key.to_string(), key_style())); + let sp = take_while(1.., |c: char| c == ' ').parse_next(input)?; + spans.push(Span::raw(sp.to_string())); + let value_spans = parse_value(input)?; + spans.extend(value_spans); + Ok(spans) +} + +// VALUE is one of: +// IDENT '(' INNER ')' — single-line variant call with payload +// IDENT '(' — multi-line open (line ends after `(`) +// IDENT — empty-payload variant (rare) +fn parse_value(input: &mut &str) -> ModalResult>> { + let variant = parse_ident(input)?; + let mut out = vec![Span::styled(variant.to_string(), variant_style())]; + if !input.starts_with('(') { + // Empty-payload variant — variant name alone (e.g. a + // bare `North` from `enum Direction = {North; South}`). + return Ok(out); + } + *input = &input[1..]; + out.push(Span::styled("(".to_string(), punct_style())); + if input.is_empty() { + // `Variant(` at end of line — multi-line open. The + // closing `)` will appear on a later line and be matched + // by the close-only branch in `parse_line`. + return Ok(out); + } + // Inline payload. The payload is everything up to the + // matching close paren, with `(`/`)` balanced. Could be: + // - a scalar string (no inner parens): color green + // - a sub-entry KEY VARIANT(...) [SP KEY VARIANT(...)]*: recurse + let payload_spans = parse_inline_payload(input)?; + out.extend(payload_spans); + if input.starts_with(')') { + *input = &input[1..]; + out.push(Span::styled(")".to_string(), punct_style())); + } + Ok(out) +} + +// Inline payload between `(` and its matching `)`. Recognizes +// either a single scalar (text with no parens) or a sequence of +// inline dict-entry-shaped sub-values (`KEY VARIANT(...) ...`). +// Stops at the closing `)` of the surrounding call. +fn parse_inline_payload(input: &mut &str) -> ModalResult>> { + // Look ahead: does the payload look like `IDENT SP IDENT (`? + // If so it's a sub-entry — recurse. Otherwise treat it as a + // scalar value. + if looks_like_sub_entry(input) { + let mut out = Vec::new(); + // First sub-entry. + let entry = parse_sub_entry(input)?; + out.extend(entry); + // Optional further sub-entries separated by space (for + // dicts with multiple inline children). + let more: Vec>> = repeat( + 0.., + ( + take_while(1.., |c: char| c == ' ') + .map(|s: &str| Span::raw(s.to_string())), + parse_sub_entry, + ) + .map(|(sp, mut e)| { + e.insert(0, sp); + e + }), + ) + .parse_next(input)?; + for chunk in more { + out.extend(chunk); + } + Ok(out) + } else { + // Scalar payload: take everything up to the next `)`. + let scalar = take_while(0.., |c: char| c != ')').parse_next(input)?; + Ok(vec![Span::styled(scalar.to_string(), scalar_style())]) + } +} + +// A sub-entry inside an inline payload: KEY SP VARIANT [( ... )]. +fn parse_sub_entry(input: &mut &str) -> ModalResult>> { + let mut out = Vec::new(); + let key = parse_ident(input)?; + out.push(Span::styled(key.to_string(), key_style())); + let sp = take_while(1.., |c: char| c == ' ').parse_next(input)?; + out.push(Span::raw(sp.to_string())); + let value_spans = parse_value(input)?; + out.extend(value_spans); + Ok(out) +} + +// Best-effort lookahead: does the input start with `IDENT SP IDENT` +// (which would indicate a KEY SP VARIANT sub-entry rather than a +// bare scalar payload)? Doesn't consume input. +fn looks_like_sub_entry(input: &&str) -> bool { + let s = input; + let mut it = s.chars(); + // First ident + let first_ok = matches!( + it.next(), + Some(c) if c.is_ascii_alphabetic() || c == '_', + ); + if !first_ok { + return false; + } + let mut saw_first = 1; + for c in it.by_ref() { + if c.is_ascii_alphanumeric() || c == '_' { + saw_first += 1; + } else if c == ' ' { + break; + } else { + return false; + } + } + if saw_first == 0 { + return false; + } + // Next must be IDENT (after the space we just consumed). + let mut second_count = 0; + for c in it { + if c.is_ascii_alphanumeric() || c == '_' { + second_count += 1; + } else { + // A sub-entry's second ident is the variant name — must + // be followed by `(` to count. + return second_count > 0 && c == '('; + } + } + false +} + +// `[A-Za-z_][A-Za-z0-9_]*` — take identifier-shaped chars then +// verify the leading character is letter/underscore (we can't +// match the leading-letter constraint and the run cleanly with a +// single `take_while`, but it's fine to take everything plausible +// then reject if the leading char would have made it digit-led). +fn parse_ident<'a>(input: &mut &'a str) -> ModalResult<&'a str> { + let ident = + take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_') + .parse_next(input)?; + match ident.chars().next() { + Some(c) if c.is_ascii_alphabetic() || c == '_' => Ok(ident), + _ => Err(winnow::error::ErrMode::Backtrack(ContextError::new())), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn close_only_line() { + let spans = highlight_line(")").expect("parses"); + assert!(!spans.is_empty()); + // Last span content is ")" + let last = &spans[spans.len() - 1]; + assert_eq!(last.content.as_ref(), ")"); + } + + #[test] + fn indented_close_line() { + let spans = highlight_line(" )").expect("parses"); + // First span = " " (indent), last span = ")" + assert_eq!(spans[0].content.as_ref(), " "); + assert_eq!(spans.last().unwrap().content.as_ref(), ")"); + } + + #[test] + fn simple_scalar_entry() { + let spans = highlight_line("CONFIG Scalar(foo)").expect("parses"); + // Should have spans for CONFIG, " ", Scalar, "(", foo, ")" + let contents: Vec<&str> = + spans.iter().map(|s| s.content.as_ref()).collect(); + assert!( + contents.contains(&"CONFIG"), + "missing CONFIG span: {contents:?}" + ); + assert!( + contents.contains(&"Scalar"), + "missing Scalar span: {contents:?}" + ); + assert!(contents.contains(&"foo"), "missing foo span: {contents:?}"); + } + + #[test] + fn variant_open_multiline() { + let spans = highlight_line("CONFIG Nested(").expect("parses"); + let contents: Vec<&str> = + spans.iter().map(|s| s.content.as_ref()).collect(); + assert!(contents.contains(&"CONFIG")); + assert!(contents.contains(&"Nested")); + assert_eq!(spans.last().unwrap().content.as_ref(), "("); + } + + #[test] + fn nested_inline_entry() { + let spans = + highlight_line("CONFIG Nested(CPM_PCIE0_MODES Scalar(None))") + .expect("parses"); + let contents: Vec<&str> = + spans.iter().map(|s| s.content.as_ref()).collect(); + assert!(contents.contains(&"CONFIG")); + assert!(contents.contains(&"Nested")); + assert!(contents.contains(&"CPM_PCIE0_MODES")); + assert!(contents.contains(&"Scalar")); + assert!(contents.contains(&"None")); + } + + #[test] + fn non_repr_line_returns_none() { + assert!(highlight_line("INFO: vivado started").is_none()); + assert!(highlight_line("just some random text").is_none()); + assert!(highlight_line("").is_none()); + } +} diff --git a/vw-repl/src/history.rs b/vw-repl/src/history.rs new file mode 100644 index 0000000..ed6ad72 --- /dev/null +++ b/vw-repl/src/history.rs @@ -0,0 +1,236 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Persistent input history with incremental search. +//! +//! Entries are appended to a newline-delimited file (one entry per +//! line, with embedded newlines escaped) under the platform's state +//! dir — typically `~/.local/state/vw/repl-history` on Linux. The +//! file is loaded once at startup; new entries are appended both to +//! memory and to the file as soon as they're recorded so a crashed +//! session doesn't lose history. +//! +//! Ctrl-R triggers an *incremental* search: as the user types, we +//! find the most recent entry whose text contains the query as a +//! substring (case-insensitive). Repeated Ctrl-R steps to the next- +//! older match. Esc cancels; Enter accepts the match into the input +//! buffer. + +use std::fs::{create_dir_all, OpenOptions}; +use std::io::{BufRead, BufReader, Write}; +use std::path::PathBuf; + +const ESCAPED_NEWLINE: &str = "\\n"; +const ESCAPED_BACKSLASH: &str = "\\\\"; + +/// In-memory history, backed by an on-disk file. Indexed +/// most-recent-last; `entries[entries.len() - 1]` is the freshest +/// record, matching how Readline / Reedline order things. +#[derive(Debug)] +pub struct History { + file_path: PathBuf, + entries: Vec, +} + +impl History { + /// Load history from the default location. Returns an empty + /// store (and skips disk writes) when no state dir is available + /// — the REPL still runs, just without persistence. + pub fn load_default() -> Self { + let path = default_history_path(); + match path { + Some(p) => Self::load_from(p), + None => Self { + file_path: PathBuf::new(), + entries: Vec::new(), + }, + } + } + + /// Load history from a specific file. Missing file → empty + /// history (the file gets created on first append). + pub fn load_from(file_path: PathBuf) -> Self { + let entries = read_entries(&file_path).unwrap_or_default(); + Self { file_path, entries } + } + + #[allow(dead_code)] // public API for the in-progress completion slice + pub fn entries(&self) -> &[String] { + &self.entries + } + + /// Append `entry` to the in-memory log and persist it. Empty or + /// whitespace-only entries are ignored. An entry identical to + /// the most recent one is also ignored (the common case of + /// re-running the same command shouldn't bloat the file). + pub fn append(&mut self, entry: &str) { + let trimmed = entry.trim(); + if trimmed.is_empty() { + return; + } + if self.entries.last().map(String::as_str) == Some(entry) { + return; + } + self.entries.push(entry.to_string()); + if self.file_path.as_os_str().is_empty() { + return; + } + if let Some(parent) = self.file_path.parent() { + let _ = create_dir_all(parent); + } + if let Ok(mut f) = OpenOptions::new() + .create(true) + .append(true) + .open(&self.file_path) + { + let _ = writeln!(f, "{}", encode_line(entry)); + } + } + + /// Find the most recent entry whose text contains `query` as a + /// substring (case-insensitive). Returns the index in + /// [`Self::entries`] plus the entry itself. `start_before` is an + /// exclusive upper bound — passing `Some(prev_idx)` resumes the + /// search at the next-older entry, which is how repeated + /// `Ctrl-R` steps backward. + pub fn search_back( + &self, + query: &str, + start_before: Option, + ) -> Option<(usize, &str)> { + if query.is_empty() { + return None; + } + let upper = start_before.unwrap_or(self.entries.len()); + let needle = query.to_lowercase(); + for i in (0..upper).rev() { + if self.entries[i].to_lowercase().contains(&needle) { + return Some((i, self.entries[i].as_str())); + } + } + None + } +} + +fn default_history_path() -> Option { + let state = dirs::state_dir().or_else(dirs::data_local_dir)?; + Some(state.join("vw").join("repl-history")) +} + +fn read_entries(path: &PathBuf) -> Option> { + let f = std::fs::File::open(path).ok()?; + let mut entries = Vec::new(); + for line in BufReader::new(f).lines().map_while(Result::ok) { + entries.push(decode_line(&line)); + } + Some(entries) +} + +fn encode_line(s: &str) -> String { + // Single-line newline-delimited file format: backslashes and + // embedded newlines get a literal escape so a multi-line htcl + // buffer round-trips cleanly. + let mut out = String::with_capacity(s.len()); + for c in s.chars() { + match c { + '\\' => out.push_str(ESCAPED_BACKSLASH), + '\n' => out.push_str(ESCAPED_NEWLINE), + other => out.push(other), + } + } + out +} + +fn decode_line(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + let mut chars = s.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + match chars.next() { + Some('n') => out.push('\n'), + Some('\\') => out.push('\\'), + Some(other) => { + out.push('\\'); + out.push(other); + } + None => out.push('\\'), + } + } else { + out.push(c); + } + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Read; + + #[test] + fn append_persists_and_dedupes_consecutive_duplicates() { + let dir = tempfile::tempdir().unwrap(); + let p = dir.path().join("h"); + let mut h = History::load_from(p.clone()); + h.append("foo"); + h.append("foo"); + h.append("bar"); + h.append(""); + h.append(" "); + assert_eq!(h.entries(), &["foo".to_string(), "bar".to_string()]); + // File round-trips. + let mut buf = String::new(); + std::fs::File::open(&p) + .unwrap() + .read_to_string(&mut buf) + .unwrap(); + assert_eq!(buf, "foo\nbar\n"); + } + + #[test] + fn multiline_entries_round_trip_with_escapes() { + let dir = tempfile::tempdir().unwrap(); + let p = dir.path().join("h"); + { + let mut h = History::load_from(p.clone()); + h.append("set x [\n create_cpm5 -name cpm5\n]"); + h.append("with \\ backslash"); + } + let h2 = History::load_from(p); + assert_eq!( + h2.entries(), + &[ + "set x [\n create_cpm5 -name cpm5\n]".to_string(), + "with \\ backslash".to_string(), + ] + ); + } + + #[test] + fn search_back_finds_most_recent_match() { + let dir = tempfile::tempdir().unwrap(); + let mut h = History::load_from(dir.path().join("h")); + h.append("set x 1"); + h.append("create_project foo"); + h.append("set y 2"); + let (idx, hit) = h.search_back("set", None).unwrap(); + assert_eq!(hit, "set y 2"); + assert_eq!(idx, 2); + // Step to the next older. + let (idx2, hit2) = h.search_back("set", Some(idx)).unwrap(); + assert_eq!(hit2, "set x 1"); + assert_eq!(idx2, 0); + // Nothing older. + assert!(h.search_back("set", Some(idx2)).is_none()); + } + + #[test] + fn search_is_case_insensitive() { + let dir = tempfile::tempdir().unwrap(); + let mut h = History::load_from(dir.path().join("h")); + h.append("Create_Project foo"); + let (_, hit) = h.search_back("create", None).unwrap(); + assert_eq!(hit, "Create_Project foo"); + } +} diff --git a/vw-repl/src/lib.rs b/vw-repl/src/lib.rs new file mode 100644 index 0000000..c534a03 --- /dev/null +++ b/vw-repl/src/lib.rs @@ -0,0 +1,66 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Interactive REPL for htcl scripts. +//! +//! A ratatui-driven shell that talks to a long-lived Vivado worker +//! via [`vw_vivado::VivadoBackend`]. The session document model +//! (every successful eval appended to an in-memory script + the +//! current input as its tail) lets the analyzer power the same +//! features the LSP gives editors — completion, hover, signature +//! help — without any REPL-specific machinery. +//! +//! v1 (this slice) ships the foundation: screen layout, multi-line +//! input with Readline-quality editing, persistent history with +//! Ctrl-R search, a long-lived Vivado worker, and `:load `. +//! Tab completion, signature help, hover overlay, command palette, +//! and structured-result rendering layer on top in subsequent +//! slices. + +mod app; +mod highlight; +mod history; +pub mod lower; +mod render; +mod session; +pub mod trace; +mod ui; + +use camino::Utf8PathBuf; +use thiserror::Error; + +pub use app::App; +pub use lower::{build_proc_locations, Origin, OriginFrame, ProcLocation}; +pub use session::Session; +pub use trace::{ + display_path, resolve_stack_frames_with, rewrite_stack_line, RewrittenFrame, +}; + +#[derive(Debug, Error)] +pub enum ReplError { + #[error("terminal I/O: {0}")] + Io(#[from] std::io::Error), + #[error("backend: {0}")] + Backend(#[from] vw_eda::BackendError), +} + +/// Tunable knobs supplied by the CLI invocation. +#[derive(Clone, Debug, Default)] +pub struct ReplOptions { + /// Forward Vivado's banner / info messages to the scrollback + /// rather than swallowing them. Useful for diagnosing a slow or + /// misbehaving worker; off by default to keep the log focused + /// on the user's evals. + pub verbose: bool, + /// If set, source this file into the session immediately after + /// the Vivado worker comes up. Equivalent to typing `:load + /// ` as the first input. + pub initial_load: Option, +} + +/// Run the REPL until the user exits. Owns the terminal alternate +/// screen for the duration; restores it on every exit path. +pub async fn run(opts: ReplOptions) -> Result<(), ReplError> { + app::run(opts).await +} diff --git a/vw-repl/src/lower.rs b/vw-repl/src/lower.rs new file mode 100644 index 0000000..37bf739 --- /dev/null +++ b/vw-repl/src/lower.rs @@ -0,0 +1,1588 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Lower a REPL input buffer to a sequence of `(htcl-origin, Tcl)` +//! commands the Vivado worker can evaluate one at a time. +//! +//! Shipping one statement per `eval` (rather than a single +//! concatenated script) is what lets us render Vivado errors against +//! htcl source. The loader's [`vw_htcl::LoadedProgram::locate_span`] +//! tells us which `.htcl` file each top-level statement came from; +//! we keep that mapping alongside the lowered Tcl so the REPL can +//! report `× : ` instead of a Tcl stack +//! trace pointing into our shim. + +use std::io::Write; +use std::path::{Path, PathBuf}; + +use camino::{Utf8Path, Utf8PathBuf}; +use vw_htcl::{LineIndex, Resolver}; + +use crate::session::{Session, SessionBatch}; + +struct NoopObserver; +impl vw_htcl::LoadObserver for NoopObserver {} + +#[derive(Debug, thiserror::Error)] +pub enum LowerError { + #[error("writing scratch input file: {0}")] + Io(#[from] std::io::Error), + #[error("loading htcl: {0}")] + Load(#[from] vw_htcl::LoadError), + #[error("{0}")] + Parse(String), +} + +/// Where in the loaded htcl tree a particular command came from. +/// Drives the error renderer in the App. +#[derive(Clone, Debug)] +pub struct Origin { + /// `.htcl` file the command was declared in, when known. `None` + /// only when the input itself wasn't backed by a real file + /// (e.g. interactive REPL input lowered before any imports). + pub file: Option, + /// 1-based line number in `file` (or in the input buffer when + /// `file` is `None`). + pub line: u32, + /// First line of the command as written by the user — used as + /// the "what was running" line in the error renderer. + pub snippet: String, + /// The chain of `src` imports that brought this command's file + /// into scope, ordered nearest-first (so the last frame is the + /// entry file / user input). Empty when the command lives + /// directly in the entry, since there's nothing to chain. + pub via: Vec, +} + +/// One frame in the `via` chain: a `src` statement in some +/// importing file, captured as that importer's path, the line the +/// `src` lives on, and the snippet of that line (so the user sees +/// `src ip/cips` and not just `src`). +#[derive(Clone, Debug)] +pub struct OriginFrame { + pub file: Option, + pub line: u32, + pub snippet: String, +} + +#[derive(Clone, Debug)] +pub struct PreparedCommand { + pub tcl: String, + pub origin: Origin, + /// Declared return type of the expression this command + /// evaluates, when knowable from static analysis. `None` for + /// expressions whose head we couldn't resolve to a known proc + /// (untyped calls, control flow, raw Tcl, etc.). The App uses + /// this to suppress the Result push entirely on `unit` and to + /// skip the heuristic fallback formatter on every other typed + /// case (since the wrapped `tcl` already returns a formatted + /// string from the type's `repr` proc). + pub expected_return_type: Option, +} + +#[derive(Debug)] +pub struct Prepared { + /// Each top-level statement in the loaded program, in source + /// order. The worker fires `eval` once per item and stops at + /// the first failure. + pub commands: Vec, + /// The parsed program + proc map for this batch. Stays out of + /// the session document until every command in [`commands`] + /// succeeds — at which point the App calls + /// [`Session::commit`](crate::session::Session::commit) to + /// fold it into the running session. On failure the batch is + /// dropped, which is what keeps a half-applied state from + /// polluting the analyzer. + pub batch: SessionBatch, + /// Pre-flight findings worth surfacing to the user *before* we + /// ship anything to Vivado. The most common one is "this call + /// uses `-flag` keyword args but the proc isn't a loaded htcl + /// wrapper" — Vivado's underlying builtin almost always parses + /// the arguments differently, and the resulting error message + /// makes no sense without that context. + pub warnings: Vec, + /// Top-level statements that lived directly in the entry file + /// (the user's `--load` target, or the typed REPL input), + /// regardless of whether they lowered to any Tcl. Captured so + /// the `--load` echo path can show `src` directives next to + /// the calls that produce Tcl — without this, `src @vivado-cmd` + /// would never get its `›` echo because its lowering is empty + /// (consumed at load time by the loader). + pub entry_top_level: Vec, +} + +#[derive(Clone, Debug)] +pub struct PrepareWarning { + pub origin: Origin, + pub message: String, +} + +/// Where a proc's body lives in htcl source. `body_start_line` is +/// the 1-based absolute line of the first body line in `file`; line +/// N of the proc's body is `body_start_line + N - 1` in `file` and +/// `body_lines[N - 1]` carries that line's text. +#[derive(Clone, Debug)] +pub struct ProcLocation { + pub file: Option, + pub body_start_line: u32, + pub body_lines: Vec, +} + +impl ProcLocation { + /// Resolve a 1-based body line into a renderable + /// (absolute_line, content) pair. Returns `None` when the + /// reported line is past the end of the body — happens when + /// Tcl points at a line we can't account for (synthesized + /// content, off-by-one in some wrapper, etc.); the caller + /// gracefully skips the frame. + pub fn resolve_body_line(&self, n: u32) -> Option<(u32, String)> { + let idx = n.checked_sub(1)? as usize; + let content = self.body_lines.get(idx).cloned()?; + Some((self.body_start_line + idx as u32, content)) + } +} + +pub fn prepare( + input: &str, + cwd: &Path, + session: &Session, +) -> Result { + let mut noop = NoopObserver; + prepare_with_observer(input, cwd, session, &mut noop) +} + +/// Same as [`prepare`], with an extra hook the loader fires per +/// parsed file. Used by the perf regression test to assert that a +/// new batch only parses its own content (plus any transitive +/// `src` imports), never the entire prior-session prelude. +pub fn prepare_with_observer( + input: &str, + cwd: &Path, + session: &Session, + observer: &mut dyn vw_htcl::LoadObserver, +) -> Result { + let workspace_dir = find_workspace_dir(cwd); + let resolver = build_resolver(workspace_dir.as_deref()); + + let scratch_dir = workspace_dir + .as_deref() + .map(Utf8Path::as_std_path) + .unwrap_or(cwd); + + // The scratch contains ONLY the new input — never a prepended + // prelude. Prior batches contribute parsed signatures and proc + // locations directly via `session`, so we never re-parse the + // entire session on each keystroke. This is what keeps the + // REPL responsive after several `src @lib` imports have built + // up hundreds of thousands of lines of wrapper declarations. + let scratch = ScratchFile::new(scratch_dir, input)?; + + let program = vw_htcl::load_program_with_observer( + &scratch.path, + &resolver, + observer, + )?; + let parsed = vw_htcl::parse(&program.source); + + if let Some(err) = parsed.errors.first() { + let idx = LineIndex::new(&program.source); + let (start, _) = idx.range(err.span); + let where_ = + render_location(&program, err.span, start.line + 1, &scratch.path); + return Err(LowerError::Parse(format!("{where_}: {}", err.message))); + } + + // Validator runs first so unknown-keyword-call errors land + // before we ship anything. Prior-batch signatures are merged + // in so calls to wrappers from earlier inputs resolve. These + // are hard errors (not pre-flight warnings); routing them back + // as `LowerError` keeps the App's existing error-rendering + // path unchanged. + let prior_sigs = session.signature_table(); + let validator_diags = vw_htcl::validate_with_signatures( + &parsed.document, + &program.source, + &prior_sigs, + ); + if let Some(first_err) = validator_diags + .iter() + .find(|d| matches!(d.severity, vw_htcl::Severity::Error)) + { + let idx = LineIndex::new(&program.source); + let (start, _) = idx.range(first_err.span); + let where_ = render_location( + &program, + first_err.span, + start.line + 1, + &scratch.path, + ); + return Err(LowerError::Parse(format!( + "{where_}: {}", + first_err.message + ))); + } + + // Build the lowering table by merging prior-batch signatures + // with the new doc's own. The new doc's entries shadow prior + // ones (Tcl's "second `proc` redefines" semantics) — done by + // starting from the prior table and `extend`-ing with the new + // doc's table, since `extend` overwrites on key collision. + let mut table = prior_sigs; + table.extend(vw_htcl::signature_table(&parsed.document)); + let line_index = LineIndex::new(&program.source); + // Parse the *raw* input (not `program.source`) to capture every + // top-level statement as the user wrote it, including `src` + // directives. The loader rewrites `src` into the imported file's + // content before parsing `program.source`, so the loader-expanded + // document no longer contains a Stmt::Command for `src @foo`. + // We need that statement to drive the `--load` echo path. + let entry_top_level: Vec = { + let entry_parsed = vw_htcl::parse(input); + let entry_idx = LineIndex::new(input); + let mut out = Vec::new(); + for stmt in &entry_parsed.document.stmts { + let vw_htcl::Stmt::Command(cmd) = stmt else { + continue; + }; + let (line, _) = entry_idx.range(cmd.span); + let snippet = input[cmd.span.start as usize..cmd.span.end as usize] + .trim_end() + .to_string(); + out.push(Origin { + file: None, + line: line.line + 1, + snippet, + via: Vec::new(), + }); + } + out + }; + + let mut commands = Vec::new(); + let mut extern_names: std::collections::BTreeSet = + std::collections::BTreeSet::new(); + + // Auto-emit machinery for enums + overload dispatchers. Both + // ship as synthetic PreparedCommand entries up front so the + // user's statements (which may construct enum values or call + // overloaded procs) find the supporting Tcl already in scope. + // The classification + overload-table build also re-runs the + // multi-decl signature collection — diagnostics from THAT pass + // already fired through the validator above, so we discard + // them here. + let mut _ignored_diags = Vec::new(); + let enum_decl_table = + vw_htcl::build_enum_decl_table(&parsed.document, &mut _ignored_diags); + // Merge prior-batch type declarations so wrap_with_repr can + // see newtypes declared in earlier `src @lib` batches (e.g. + // `type Properties = dict` from + // @vivado-cmd, when the user types + // `util::props -object $cips` at a later REPL prompt). + // Without this merge, the wrap can't recurse into + // Properties's underlying to ship + // `dict_string_Property::repr`, and the user's + // `Properties::repr` body fails with `invalid command + // name`. + let mut type_decl_table = session.type_decl_table(); + let batch_type_decls = + vw_htcl::build_type_decl_table(&parsed.document, &mut _ignored_diags); + for (name, td) in batch_type_decls { + type_decl_table.insert(name, td); + } + let (_full_sig_table, overload_table) = + vw_htcl::build_signature_table_with_overloads( + &parsed.document, + &mut _ignored_diags, + ); + for ed in enum_decl_table.values() { + let prelude = vw_htcl::emit_enum_prelude(ed); + if prelude.is_empty() { + continue; + } + commands.push(PreparedCommand { + tcl: prelude, + origin: Origin { + file: None, + line: 0, + snippet: format!( + "", + ed.name.as_deref().unwrap_or("?") + ), + via: Vec::new(), + }, + expected_return_type: None, + }); + } + for info in overload_table.values() { + let dispatcher = vw_htcl::emit_dispatcher(info); + commands.push(PreparedCommand { + tcl: dispatcher, + origin: Origin { + file: None, + line: 0, + snippet: format!("", info.public_name), + via: Vec::new(), + }, + expected_return_type: None, + }); + } + + // Eagerly emit the primitive repr prelude (string/int/bool/ + // unit) so user procs that call e.g. `extern::string::repr` + // from inside their bodies see those procs in scope. Without + // this, the primitives are only emitted by `wrap_with_repr` + // at top-level REPL eval sites, leaving inner uses dead. + for proc in vw_htcl::repr::emit_primitive_prelude() { + commands.push(PreparedCommand { + tcl: proc, + origin: Origin { + file: None, + line: 0, + snippet: "".into(), + via: Vec::new(), + }, + expected_return_type: None, + }); + } + + // Eagerly emit monomorphized generic reprs for every declared + // type alias whose underlying is a generic + // (`dict<…>` / `list<…>`). Without this, a user-written + // `T::repr` body that delegates to the compiler-synthesized + // monomorphized name (e.g. `Properties::repr` calling + // `extern::dict_string_Property::repr`) errors at runtime + // when invoked from inside a proc body — `wrap_with_repr` + // only emits the monomorphization chain at top-level REPL + // eval sites, not for inner uses. By emitting here, the + // procs are in scope everywhere within the session. + // + // Dedup-by-text within the batch prevents shipping the same + // monomorphization more than once when two type aliases + // resolve to the same underlying generic. + let mut emitted_mono_reprs: std::collections::HashSet = + std::collections::HashSet::new(); + for td in type_decl_table.values() { + let Some(underlying) = td.underlying.as_ref() else { + continue; + }; + if !matches!(underlying, vw_htcl::TypeExpr::Generic { .. }) { + continue; + } + let emission = + vw_htcl::repr::emit_repr_with_types(underlying, &type_decl_table); + for proc in emission.procs { + if !emitted_mono_reprs.insert(proc.clone()) { + continue; + } + commands.push(PreparedCommand { + tcl: proc, + origin: Origin { + file: None, + line: 0, + snippet: format!( + "", + td.name.as_deref().unwrap_or("?") + ), + via: Vec::new(), + }, + expected_return_type: None, + }); + } + } + + for stmt in &parsed.document.stmts { + let vw_htcl::Stmt::Command(cmd) = stmt else { + continue; + }; + let (line_one_based, _) = line_index.range(cmd.span); + let origin = build_origin( + &program, + cmd.span, + line_one_based.line + 1, + &scratch.path, + ); + // If this command is a proc that's been classified as an + // overload specialization, lower it under its mangled name + // so the dispatcher (shipped above) can find it. Otherwise + // take the normal path. + let lowered_raw = + match overload_specialization_mangle(cmd, &overload_table) { + Some(mangled) => { + let vw_htcl::CommandKind::Proc(proc) = &cmd.kind else { + unreachable!() + }; + vw_htcl::lower_proc_decl_with_name( + proc, + &program.source, + &table, + Some(&mangled), + ) + } + None => vw_htcl::lower_command(cmd, &program.source, &table), + }; + let rewritten = vw_htcl::rewrite_externs(&lowered_raw); + for name in rewritten.names { + extern_names.insert(name); + } + if rewritten.text.trim().is_empty() { + continue; + } + // Resolve the command's expected return type and, if any, + // wrap the lowered Tcl so it dispatches through the type's + // `repr` proc. The wrapped form runs the user's expression + // into a sentinel local then formats via the repr; the + // sentinel-binding step preserves `set var [...]`-style + // bindings (the user's `$var` still gets the raw value). + let expected_return_type = resolve_return_type(cmd, &table); + let final_tcl = match expected_return_type.as_ref() { + Some(ty) => wrap_with_repr(&rewritten.text, ty, &type_decl_table), + None => rewritten.text, + }; + commands.push(PreparedCommand { + tcl: final_tcl, + origin, + expected_return_type, + }); + } + + // No prelude needed in the current architecture: wrappers + // live in the `vivado::` namespace and `extern::name` rewrites + // to `::name`, which Tcl resolves at the global root regardless + // of the calling namespace. We still drain `extern_names` so + // the analyzer can grow future per-extern bookkeeping without + // rewiring this path. + let _ = extern_names; + + let procs = build_proc_locations(&parsed.document, &program, &scratch.path); + // The dedicated pre-flight `collect_warnings` is gone — the + // validator now treats "unknown call with `-flag` args" as a + // hard error and the REPL has already returned via `LowerError` + // above when one fires. + let warnings: Vec = Vec::new(); + + Ok(Prepared { + commands, + batch: SessionBatch { + program, + document: parsed.document, + procs, + }, + warnings, + entry_top_level, + }) +} + +/// Walk every proc declaration (top-level + nested inside +/// `namespace eval` blocks) and record its body's source location +/// keyed by the proc's qualified name. Same recursion shape as +/// `vw_htcl::validate::collect_signatures` — kept in sync by +/// convention rather than refactor so this crate stays a leaf +/// consumer of vw-htcl. +pub fn build_proc_locations( + doc: &vw_htcl::Document, + program: &vw_htcl::LoadedProgram, + scratch_path: &Path, +) -> std::collections::HashMap { + use std::collections::HashMap; + let mut out: HashMap = HashMap::new(); + collect_procs(&doc.stmts, "", program, scratch_path, &mut out); + out +} + +fn collect_procs( + stmts: &[vw_htcl::Stmt], + prefix: &str, + program: &vw_htcl::LoadedProgram, + scratch_path: &Path, + out: &mut std::collections::HashMap, +) { + use vw_htcl::CommandKind; + for stmt in stmts { + let vw_htcl::Stmt::Command(cmd) = stmt else { + continue; + }; + match &cmd.kind { + CommandKind::Proc(proc) => { + let Some(name) = proc.name.as_deref() else { + continue; + }; + let qualified = qualify(prefix, name); + if let Some(loc) = + proc_body_location(program, proc.body_span, scratch_path) + { + out.insert(qualified, loc); + } + } + CommandKind::NamespaceEval(ns) => { + let Some(name) = ns.name.as_deref() else { + continue; + }; + let nested = qualify(prefix, name); + collect_procs(&ns.body, &nested, program, scratch_path, out); + } + _ => {} + } + } +} + +fn qualify(prefix: &str, name: &str) -> String { + if prefix.is_empty() { + name.to_string() + } else { + format!("{prefix}::{name}") + } +} + +fn proc_body_location( + program: &vw_htcl::LoadedProgram, + body_span: vw_htcl::Span, + scratch_path: &Path, +) -> Option { + let (file_index, file_span) = program.locate_span(body_span)?; + let file = &program.files[file_index]; + let file_path = if file.path == scratch_path { + None + } else { + Some(file.path.clone()) + }; + // Tcl's `(procedure "X" line N)` counts the line **containing + // the opening `{`** as line 1, the next line as line 2, etc. + // `body_span.start` is the byte right after the `{`, so the + // `{` itself sits at `file_span.start - 1`. The line at that + // byte is what Tcl calls "line 1." When the proc body is on a + // single line (`proc f {x} {puts $x}`) that line is also the + // content line. + let brace_pos = file_span.start.saturating_sub(1); + let body_start_line = file_line_at(&file.source, brace_pos); + // For the body_lines vector we want every file line from the + // one with the `{` up to (and including) the one with the + // matching `}` — so `resolve_body_line(N)` returns the + // corresponding source. Anything past the body is irrelevant. + let body_end_line = + file_line_at(&file.source, file_span.end.saturating_sub(1)); + let body_lines: Vec = file + .source + .lines() + .skip(body_start_line.saturating_sub(1) as usize) + .take((body_end_line - body_start_line + 1) as usize) + .map(str::to_string) + .collect(); + Some(ProcLocation { + file: file_path, + body_start_line, + body_lines, + }) +} + +/// Return the declared return type of `cmd`'s head call, when we +/// can resolve it from the signature table. Currently handles two +/// shapes: +/// +/// - Direct call: `proc-name arg arg …` → look up `proc-name`'s +/// return type in the table. +/// - Bracket-bound assignment: `set var [proc-name …]` → look up +/// the inner bracketed call's return type (since `set` returns +/// the value being set, which is the type of the inner call). +/// +/// Anything else (control flow, variable substitution, raw Tcl, +/// unknown commands) returns `None`. The App falls back to the +/// untyped-display path for those. +fn resolve_return_type( + cmd: &vw_htcl::ast::Command, + table: &std::collections::HashMap, +) -> Option { + let head = cmd.words.first()?.as_text()?; + if head == "set" { + // `set var [EXPR]` → recurse into the bracketed + // expression on the third word (words[2]). Other `set` + // shapes (set var literal, set var $other) leave the + // type unknown — we'd need real expression type-inference + // to do better, and that's out of scope for v1. + let val_word = cmd.words.get(2)?; + // Look for a CmdSubst part — `[…]` — at the top of the + // value word. If found, recurse into the bracketed + // command's first statement. + for part in &val_word.parts { + if let vw_htcl::WordPart::CmdSubst { body, .. } = part { + let vw_htcl::Stmt::Command(inner) = body.first()? else { + continue; + }; + return resolve_return_type(inner, table); + } + } + return None; + } + let sig = table.get(head)?; + sig.return_type.clone() +} + +/// Wrap the lowered Tcl `inner` so that, after evaluating it, the +/// result is fed through `::repr` (or the appropriate +/// monomorphized generic repr) to produce a display string. +/// +/// Prepends: +/// 1. The primitive prelude (`string` / `int` / `bool` / `unit` +/// triplets) — cheap to redefine per-eval; Tcl `proc` +/// redefinition is idempotent. +/// 2. Any per-instantiation generic reprs needed for `ty`, in +/// topological order so each proc is defined before its +/// dependents call it. +/// 3. `set __vw_result []` — captures the user expression's +/// raw value into a sentinel local. This preserves any +/// `set var [...]` bindings the user wrote, since `set`'s +/// side effect runs before our sentinel-capture wraps it. +/// 4. ` $__vw_result` — calls the type's repr proc on +/// the captured value. The eval returns this formatted string. +fn wrap_with_repr( + inner: &str, + ty: &vw_htcl::TypeExpr, + types: &std::collections::HashMap, +) -> String { + use std::fmt::Write; + let mut out = String::new(); + for p in vw_htcl::repr::emit_primitive_prelude() { + out.push_str(&p); + } + // Walks the dispatch type's underlying when `ty` is a newtype + // — necessary for `Properties` (newtype wrapping + // `dict`) so the body of `Properties::repr` + // can call the monomorphized `dict_string_Property::repr`. + let emission = vw_htcl::repr::emit_repr_with_types(ty, types); + for p in &emission.procs { + out.push_str(p); + } + writeln!(out, "set __vw_result [{}]", inner.trim_end()) + .expect("writeln to String never fails"); + // All reprs (compiler-emitted primitives + generics + user- + // written newtype reprs + auto-generated enum reprs) share a + // single `{args}` envelope that uses `::vw::kwargs` to bind + // `$v`. The dispatch site always calls them as + // ` -v ` so the kwargs envelope binds + // uniformly regardless of which class of repr is being + // invoked. + write!(out, "{} -v $__vw_result", emission.dispatch) + .expect("write to String never fails"); + out +} + +/// If `cmd` is a top-level `proc` whose name appears in the +/// overload table AND whose first arg is a qualified-variant +/// annotation, return the mangled internal name that this +/// specialization should lower under. Otherwise `None`. +/// +/// This is what reroutes user-written `proc handle_prop {v: +/// Property::Scalar} { … }` from emitting under the literal +/// `handle_prop` name (which would collide with the synthesized +/// dispatcher) to emitting under `__handle_prop__Scalar` (which +/// the dispatcher's switch arm calls). +fn overload_specialization_mangle( + cmd: &vw_htcl::Command, + overloads: &vw_htcl::OverloadTable, +) -> Option { + let vw_htcl::CommandKind::Proc(proc) = &cmd.kind else { + return None; + }; + let name = proc.name.as_deref()?; + if !overloads.contains_key(name) { + return None; + } + let sig = proc.signature.as_ref()?; + let first = sig.args.first()?; + let vw_htcl::TypeExpr::Qualified { variant, .. } = + first.type_annotation.as_ref()? + else { + return None; + }; + Some(vw_htcl::mangle_specialization(name, variant)) +} + +fn build_origin( + program: &vw_htcl::LoadedProgram, + span: vw_htcl::Span, + flat_line: u32, + scratch_path: &Path, +) -> Origin { + // Full span — for a multi-line `set proj [ … ]` the snippet + // includes every line of the command so the trace shows what + // the user actually wrote, not just `set proj [`. The renderer + // is responsible for indenting continuation lines. + let snippet = program.source[span.start as usize..span.end as usize] + .trim_end() + .to_string(); + + if let Some((file_index, file_span)) = program.locate_span(span) { + let file = &program.files[file_index]; + let file_path = if file.path == scratch_path { + None + } else { + Some(file.path.clone()) + }; + let file_line = file_line_at(&file.source, file_span.start); + let via = build_via_chain(program, file_index, scratch_path); + return Origin { + file: file_path, + line: file_line, + snippet, + via, + }; + } + Origin { + file: None, + line: flat_line, + snippet, + via: Vec::new(), + } +} + +/// Walk the loader's import chain from the leaf file back toward +/// the entry, turning each [`vw_htcl::ImportEdge`] into a renderable +/// frame. Nearest first. +fn build_via_chain( + program: &vw_htcl::LoadedProgram, + leaf_file: usize, + scratch_path: &Path, +) -> Vec { + program + .ancestry(leaf_file) + .map(|edge| { + let importer = &program.files[edge.importer_file]; + let line = file_line_at(&importer.source, edge.src_span.start); + let snippet = first_line( + &importer.source, + edge.src_span.start as usize, + edge.src_span.end as usize, + ); + OriginFrame { + file: if importer.path == scratch_path { + None + } else { + Some(importer.path.clone()) + }, + line, + snippet, + } + }) + .collect() +} + +fn first_line(source: &str, start: usize, end: usize) -> String { + let line_end = source[start..].find('\n').map(|n| start + n).unwrap_or(end); + source[start..line_end].trim().to_string() +} + +fn file_line_at(source: &str, offset: u32) -> u32 { + let upto = offset.min(source.len() as u32) as usize; + 1 + source[..upto].bytes().filter(|b| *b == b'\n').count() as u32 +} + +fn render_location( + program: &vw_htcl::LoadedProgram, + span: vw_htcl::Span, + flat_line: u32, + scratch_path: &Path, +) -> String { + if let Some((file_index, file_span)) = program.locate_span(span) { + let file = &program.files[file_index]; + if file.path != scratch_path { + let line = file_line_at(&file.source, file_span.start); + return format!("{}:{line}", file.path.display()); + } + } + format!("(input):{flat_line}") +} + +fn find_workspace_dir(start: &Path) -> Option { + let mut cur = Utf8PathBuf::from_path_buf(start.to_path_buf()).ok()?; + loop { + if cur.join("vw.toml").exists() { + return Some(cur); + } + let parent = cur.parent()?.to_path_buf(); + if parent == cur { + return None; + } + cur = parent; + } +} + +fn build_resolver(workspace_dir: Option<&Utf8Path>) -> Resolver { + let mut resolver = Resolver::new(); + let Some(ws) = workspace_dir else { + return resolver; + }; + if let Ok(paths) = vw_lib::transitive_dep_cache_paths(ws) { + for (name, path) in paths { + resolver = resolver.with_dep(name, path); + } + } + resolver +} + +struct ScratchFile { + path: PathBuf, +} + +impl ScratchFile { + fn new(dir: &Path, contents: &str) -> std::io::Result { + let name = format!(".vw-repl-input-{}.htcl", std::process::id()); + let path = dir.join(name); + let mut f = std::fs::File::create(&path)?; + f.write_all(contents.as_bytes())?; + Ok(Self { path }) + } +} + +impl Drop for ScratchFile { + fn drop(&mut self) { + let _ = std::fs::remove_file(&self.path); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn empty_session() -> Session { + Session::new() + } + + /// User-statement commands only — strips the synthetic + /// prelude entries (enum reprs, overload dispatchers, + /// primitive reprs, monomorphized generic reprs) the + /// preparer ships before each batch. Tests that assert + /// command count / shape only care about what the user + /// wrote, not the prelude scaffolding. + fn user_commands(prep: &Prepared) -> Vec<&PreparedCommand> { + prep.commands + .iter() + .filter(|c| !c.origin.snippet.starts_with('<')) + .collect() + } + + #[test] + fn unknown_keyword_call_inside_bracket_errors() { + // Mirrors the metroid project.htcl shape: a call to an + // unknown proc with keyword args, nested inside a `[ … ]` + // substitution. The validator now treats this as a hard + // error so the lowering returns `Err` and nothing ships + // to Vivado — the user is forced to either `src` a + // wrapper module or write `extern::create_project`. + let dir = tempfile::tempdir().unwrap(); + let err = prepare( + "set proj [\n create_project\n -in_memory 1\n -name foo\n]\n", + dir.path(), + &empty_session(), + ) + .unwrap_err(); + let msg = format!("{err}"); + assert!(msg.contains("create_project"), "{msg}"); + assert!(msg.contains("extern::"), "{msg}"); + } + + #[test] + fn extern_prefixed_call_is_accepted() { + // The opt-out: `extern::create_project` is explicitly a + // raw Tcl call, no wrapper required. Lowering strips the + // prefix so the bare native resolves through Tcl's global + // namespace at runtime — no rename plumbing, no prelude. + let dir = tempfile::tempdir().unwrap(); + let prep = prepare( + "extern::create_project -name foo\n", + dir.path(), + &empty_session(), + ) + .unwrap(); + let cmds = user_commands(&prep); + assert_eq!(cmds.len(), 1, "{:?}", cmds); + assert!( + cmds[0].tcl.contains("create_project -name foo"), + "{}", + cmds[0].tcl + ); + assert!(!cmds[0].tcl.contains("extern::"), "{}", cmds[0].tcl); + } + + #[test] + fn prior_batch_procs_resolve_in_next_batch() { + // Reproduces the REPL "src @lib then call" pattern: a + // wrapper declared in a previous batch should be visible + // to the analyzer/lowering when we lower a bare call in + // the next batch — and the new batch should ship only + // its own statement (not re-emit the wrapper). + let dir = tempfile::tempdir().unwrap(); + let mut session = Session::new(); + // Batch 1: declare the wrapper. Commit so it joins the + // session — same flow the App follows on successful eval. + let first = prepare( + "namespace eval vivado {\n \ + proc current_project {\n \ + @enum(0, 1) @default(0) quiet\n \ + @enum(0, 1) @default(0) verbose\n \ + @default(\"\") project\n \ + } {\n \ + set cmd [list ::current_project]\n \ + return [{*}$cmd]\n \ + }\n\ + }\n", + dir.path(), + &session, + ) + .unwrap(); + // The first batch ships its own declaration to the worker + // exactly once — that's what makes the wrapper exist in + // Tcl. Subsequent batches must NOT re-emit it. + assert!( + first + .commands + .iter() + .any(|c| c.tcl.contains("namespace eval")), + "first batch must ship the namespace decl: {:?}", + first.commands + ); + session.commit(first.batch); + + // Batch 2: bare call to the wrapper. Should ship as-is + // (htcl is keyword-only at the call site; the wrapper + // parses its own kwargs at runtime via the ::vw::kwargs + // prelude), with no rewriting and no re-emission of the + // prior batch's declaration. + let prep = + prepare("vivado::current_project\n", dir.path(), &session).unwrap(); + let cmds = user_commands(&prep); + assert_eq!(cmds.len(), 1, "{:?}", cmds); + assert!( + cmds[0].tcl.contains("vivado::current_project"), + "{}", + cmds[0].tcl + ); + // And nothing in the new batch's source mentions the + // wrapper body — we never re-parsed the prior batch. + assert!( + !prep.batch.program.source.contains("namespace eval vivado"), + "{}", + prep.batch.program.source + ); + } + + #[test] + fn known_keyword_call_is_not_errored() { + // When the called proc IS in scope, no error fires. + let dir = tempfile::tempdir().unwrap(); + let prep = prepare( + "proc create_project { @default(\"\") name } { }\n\ + set proj [ create_project -name foo ]\n", + dir.path(), + &empty_session(), + ) + .unwrap(); + assert!(prep.warnings.is_empty(), "{:?}", prep.warnings); + } + + #[test] + fn lowers_plain_proc_call_to_tcl() { + let dir = tempfile::tempdir().unwrap(); + let prep = prepare("puts hello", dir.path(), &empty_session()).unwrap(); + let cmds = user_commands(&prep); + assert_eq!(cmds.len(), 1); + assert!(cmds[0].tcl.contains("puts hello")); + // Input is at line 1 of the buffer. + assert_eq!(cmds[0].origin.line, 1); + assert!(cmds[0].origin.file.is_none()); + assert_eq!(cmds[0].origin.snippet, "puts hello"); + } + + #[test] + fn each_statement_gets_its_own_origin() { + let dir = tempfile::tempdir().unwrap(); + let prep = + prepare("set x 1\nset y 2\nset z 3", dir.path(), &empty_session()) + .unwrap(); + let cmds = user_commands(&prep); + assert_eq!(cmds.len(), 3); + assert_eq!(cmds[0].origin.line, 1); + assert_eq!(cmds[1].origin.line, 2); + assert_eq!(cmds[2].origin.line, 3); + } + + #[test] + fn proc_body_line_resolution_matches_tcl_line_counting() { + // Tcl counts the proc-body line **containing the opening + // `{`** as line 1 — so a `(procedure "ip::check" line 2)` + // frame should point at the first content line of the body, + // not the line after it. + let dir = tempfile::tempdir().unwrap(); + let dep = dir.path().join("dep"); + std::fs::create_dir_all(&dep).unwrap(); + // Lines 1-2: blank + the namespace header; line 3 has `{` + // (the proc body opener); content lives on lines 4+. + std::fs::write( + dep.join("module.htcl"), + "namespace eval foo {\n proc bar {} {\n puts hi\n error oh-no\n }\n}\n", + ) + .unwrap(); + std::fs::write( + dir.path().join("vw.toml"), + format!( + "[workspace]\nname=\"t\"\nversion=\"0.1.0\"\n\n\ + [dependencies.dep]\npath = \"{}\"\n", + dep.display() + ), + ) + .unwrap(); + let prep = prepare("src @dep", dir.path(), &empty_session()).unwrap(); + let loc = prep + .batch + .procs + .get("foo::bar") + .expect("expected foo::bar in proc map"); + // The proc body opens on file line 2 (the `} {`-style line + // here is just `proc bar {} {`), so Tcl line 1 → line 2 of + // the file. + assert_eq!(loc.body_start_line, 2); + // Tcl line 2 → file line 3 → `puts hi`. + let (line, content) = loc.resolve_body_line(2).unwrap(); + assert_eq!(line, 3); + assert_eq!(content.trim(), "puts hi"); + // Tcl line 3 → file line 4 → `error oh-no`. + let (line, content) = loc.resolve_body_line(3).unwrap(); + assert_eq!(line, 4); + assert_eq!(content.trim(), "error oh-no"); + } + + #[test] + fn origin_via_chain_walks_back_through_src_imports() { + // entry → mid → leaf, all via `src`. A command in `leaf` + // should carry a 2-frame via chain (mid → entry/input). + let dir = tempfile::tempdir().unwrap(); + let mid_dep = dir.path().join("mid_dep"); + let leaf_dep = dir.path().join("leaf_dep"); + std::fs::create_dir_all(&mid_dep).unwrap(); + std::fs::create_dir_all(&leaf_dep).unwrap(); + std::fs::write(leaf_dep.join("module.htcl"), "set leaf_var 1\n") + .unwrap(); + std::fs::write(mid_dep.join("module.htcl"), "src @leaf\n").unwrap(); + std::fs::write( + dir.path().join("vw.toml"), + format!( + "[workspace]\nname=\"t\"\nversion=\"0.1.0\"\n\n\ + [dependencies.mid]\npath = \"{}\"\n\ + [dependencies.leaf]\npath = \"{}\"\n", + mid_dep.display(), + leaf_dep.display() + ), + ) + .unwrap(); + + let prep = prepare("src @mid", dir.path(), &empty_session()).unwrap(); + let cmds = user_commands(&prep); + assert_eq!(cmds.len(), 1); + let origin = &cmds[0].origin; + // Leaf-most command lives in leaf_dep/module.htcl. + assert!( + origin + .file + .as_ref() + .unwrap() + .ends_with("leaf_dep/module.htcl"), + "{:?}", + origin.file + ); + // The via chain: leaf was imported by mid (line 1), and mid + // was imported by the entry input (line 1). + assert_eq!(origin.via.len(), 2, "{:?}", origin.via); + assert!(origin.via[0] + .file + .as_ref() + .unwrap() + .ends_with("mid_dep/module.htcl")); + assert_eq!(origin.via[0].snippet, "src @leaf"); + // The outermost frame is the user's input (file = None). + assert!(origin.via[1].file.is_none(), "{:?}", origin.via[1].file); + assert_eq!(origin.via[1].snippet, "src @mid"); + } + + #[test] + fn src_imported_statements_resolve_to_imported_file() { + let dir = tempfile::tempdir().unwrap(); + let dep = dir.path().join("dep"); + std::fs::create_dir_all(&dep).unwrap(); + std::fs::write( + dep.join("module.htcl"), + "proc hello {} { puts world }\nhello\n", + ) + .unwrap(); + std::fs::write( + dir.path().join("vw.toml"), + format!( + "[workspace]\nname=\"t\"\nversion=\"0.1.0\"\n\n\ + [dependencies.dep]\npath = \"{}\"\n", + dep.display() + ), + ) + .unwrap(); + + let prep = prepare("src @dep", dir.path(), &empty_session()).unwrap(); + // Two commands from the imported file: `proc hello` and the + // bare `hello` call. Both must carry the imported file's + // path as origin. + let cmds = user_commands(&prep); + assert_eq!(cmds.len(), 2); + for cmd in &cmds { + let file = cmd.origin.file.as_ref().expect("import has file"); + assert!(file.ends_with("dep/module.htcl"), "{:?}", file); + } + // Line numbers point into the imported file. + assert_eq!(cmds[0].origin.line, 1); + assert_eq!(cmds[1].origin.line, 2); + } + + #[test] + fn second_batch_parses_only_its_own_files() { + // Regression guard against the lag bug: after `src @dep` + // commits, a subsequent bare call must NOT cause the + // loader to re-parse the dep's files. We assert by hooking + // the loader's per-file observer and counting parses on + // each batch. + let dir = tempfile::tempdir().unwrap(); + let dep = dir.path().join("dep"); + std::fs::create_dir_all(&dep).unwrap(); + std::fs::write( + dep.join("module.htcl"), + "namespace eval lib {\n \ + proc f { @default(0) x } { return $x }\n\ + }\n", + ) + .unwrap(); + std::fs::write( + dir.path().join("vw.toml"), + format!( + "[workspace]\nname=\"t\"\nversion=\"0.1.0\"\n\n\ + [dependencies.dep]\npath = \"{}\"\n", + dep.display() + ), + ) + .unwrap(); + + #[derive(Default)] + struct Counter { + parsed: Vec, + } + impl vw_htcl::LoadObserver for Counter { + fn on_parsed(&mut self, file: &Path, _raw: Option<&str>) { + self.parsed.push(file.to_path_buf()); + } + } + + let mut session = Session::new(); + + // First batch: imports the dep. Two files parse — the + // entry scratch and the dep's module.htcl. + let mut counter = Counter::default(); + let first = prepare_with_observer( + "src @dep\n", + dir.path(), + &session, + &mut counter, + ) + .unwrap(); + assert_eq!( + counter.parsed.len(), + 2, + "first batch should parse entry + dep, got {:?}", + counter.parsed + ); + session.commit(first.batch); + + // Second batch: bare call to the wrapper. The prior + // batch's signatures are merged in via `session`, so the + // loader must NOT re-read the dep's file — only the new + // scratch parses. + let mut counter = Counter::default(); + let _second = prepare_with_observer( + "lib::f -x 1\n", + dir.path(), + &session, + &mut counter, + ) + .unwrap(); + assert_eq!( + counter.parsed.len(), + 1, + "second batch should parse only the new input, got {:?}", + counter.parsed + ); + // And the one file parsed is the scratch, not the dep. + let only = &counter.parsed[0]; + assert!( + !only.starts_with(&dep), + "the dep's files must not be re-parsed on a fresh \ + batch: {:?}", + only + ); + } + + #[test] + fn prior_batch_proc_location_survives_for_drilldown() { + // The user-reported bug: `src @vivado-cmd` declares + // `vivado::create_bd_design` in batch A, then a later + // `vivado::create_bd_design -name metroid` fires in batch + // B and the Tcl error frame names that proc. The + // proc-location lookup must resolve to the REAL .htcl + // file the wrapper came from — not the disposable scratch + // path of either batch. + let dir = tempfile::tempdir().unwrap(); + let dep = dir.path().join("vivado_cmd"); + std::fs::create_dir_all(&dep).unwrap(); + std::fs::write( + dep.join("module.htcl"), + "namespace eval vivado {\n \ + proc create_bd_design {\n \ + @default(\"\") name\n \ + } {\n \ + set cmd [list ::create_bd_design]\n \ + return [{*}$cmd]\n \ + }\n\ + }\n", + ) + .unwrap(); + std::fs::write( + dir.path().join("vw.toml"), + format!( + "[workspace]\nname=\"t\"\nversion=\"0.1.0\"\n\n\ + [dependencies.vivado-cmd]\npath = \"{}\"\n", + dep.display() + ), + ) + .unwrap(); + + let mut session = Session::new(); + // Batch A: pull the wrapper in. + let first = prepare("src @vivado-cmd\n", dir.path(), &session).unwrap(); + session.commit(first.batch); + + // Batch B: call the wrapper. Look up its location through + // the session — which is exactly the path the App's error + // renderer takes when resolving a Tcl drill-down frame. + let _second = prepare( + "vivado::create_bd_design -name metroid\n", + dir.path(), + &session, + ) + .unwrap(); + let loc = session.lookup_proc("vivado::create_bd_design").expect( + "wrapper from a prior `src @vivado-cmd` batch must be \ + reachable through session.lookup_proc", + ); + // The crucial assertion: the file pointer is the REAL + // imported .htcl, not `None` (the scratch) and not some + // huge synthetic offset. + let file = loc.file.as_ref().expect( + "wrapper from imported module must carry its real \ + .htcl path, not the disposable scratch", + ); + assert!( + file.ends_with("vivado_cmd/module.htcl"), + "expected the imported module path, got {:?}", + file + ); + // And `body_start_line` is the file-local line of the + // proc body opener — small, not a combined-scratch offset. + assert!( + loc.body_start_line < 100, + "body_start_line should be a small file-local number, \ + got {}", + loc.body_start_line + ); + } + + // --- typed-expression wrap (step 3) ---------------------------- + + #[test] + fn typed_proc_call_wraps_with_repr_dispatch() { + let dir = tempfile::tempdir().unwrap(); + // A proc annotated dict, called bare. The + // wrap should: + // - capture the call's result into __vw_result + // - invoke the monomorphized dict repr proc on it + // PreparedCommand.expected_return_type carries the type. + let prep = prepare( + "proc props {} dict { return {} }\n\ + props\n", + dir.path(), + &empty_session(), + ) + .unwrap(); + // Two commands: proc decl (drops to empty Tcl) + call. + // proc decl ships as a regular command; the call should + // be wrapped. + let call = prep + .commands + .iter() + .find(|c| c.tcl.contains("__vw_result")) + .expect("expected the `props` call to be repr-wrapped"); + assert!( + call.tcl.contains("set __vw_result [props]"), + "tcl: {}", + call.tcl + ); + assert!( + call.tcl + .contains("dict_string_string::repr -v $__vw_result"), + "tcl: {}", + call.tcl + ); + // Primitive prelude is included so the dict repr's + // element calls (string::repr) resolve. Both the + // primitive procs and the monomorphized generic procs + // are wrapped in explicit `namespace eval` blocks so + // Tcl's namespace-conflict heuristic doesn't reject the + // declaration (the bare `proc string::repr` form trips + // over Tcl's built-in `string` command). + assert!( + call.tcl.contains("namespace eval string"), + "expected primitive prelude in wrapped tcl: {}", + call.tcl + ); + // Plus the dict repr itself. + assert!( + call.tcl.contains("namespace eval dict_string_string"), + "expected monomorphized dict repr: {}", + call.tcl + ); + // The expected_return_type rides along for App-side use. + let ty = call + .expected_return_type + .as_ref() + .expect("expected_return_type set"); + match ty { + vw_htcl::TypeExpr::Generic { name, args, .. } => { + assert_eq!(name, "dict"); + assert_eq!(args.len(), 2); + } + _ => panic!("expected Generic, got {:?}", ty), + } + } + + #[test] + fn set_var_call_inherits_inner_return_type() { + // `set cips [props]` — `set` returns the value being set, + // so its type is whatever `props` returns. The wrap should + // bind `$cips` correctly AND dispatch on the inner call's + // declared type. + let dir = tempfile::tempdir().unwrap(); + let prep = prepare( + "proc props {} dict { return {} }\n\ + set x [props]\n", + dir.path(), + &empty_session(), + ) + .unwrap(); + let set_cmd = prep + .commands + .iter() + .find(|c| c.tcl.contains("__vw_result")) + .expect("expected the `set x [...]` to be repr-wrapped"); + assert!( + set_cmd.tcl.contains("set __vw_result [set x [props]]"), + "expected the original set to be inner-wrapped: {}", + set_cmd.tcl + ); + assert!( + set_cmd + .tcl + .contains("dict_string_string::repr -v $__vw_result"), + "tcl: {}", + set_cmd.tcl + ); + } + + #[test] + fn unannotated_call_is_not_wrapped() { + // No return type → no wrap, no `__vw_result` capture, + // and `expected_return_type` is None. + let dir = tempfile::tempdir().unwrap(); + let prep = prepare( + "proc plain {} { return whatever }\n\ + plain\n", + dir.path(), + &empty_session(), + ) + .unwrap(); + let plain_call = prep + .commands + .iter() + .find(|c| c.tcl.trim() == "plain") + .expect("expected raw `plain` call without wrap"); + assert!(plain_call.expected_return_type.is_none()); + assert!( + !plain_call.tcl.contains("__vw_result"), + "unannotated calls shouldn't get the repr wrap: {}", + plain_call.tcl + ); + } + + #[test] + fn unit_typed_call_is_wrapped_with_unit_dispatch() { + // `unit`-typed expressions still get wrapped — the wrap + // returns the empty string from `unit::repr`. The App's + // EvalDone handler is what suppresses the Result push; + // the lowerer is uniform. + let dir = tempfile::tempdir().unwrap(); + let prep = prepare( + "proc do_thing {} unit { puts hi }\n\ + do_thing\n", + dir.path(), + &empty_session(), + ) + .unwrap(); + let call = prep + .commands + .iter() + .find(|c| c.tcl.contains("__vw_result")) + .expect("expected repr-wrap on unit-typed call"); + assert!(call.tcl.contains("unit::repr -v $__vw_result")); + let ty = call.expected_return_type.as_ref().unwrap(); + match ty { + vw_htcl::TypeExpr::Named { name, .. } => { + assert_eq!(name, "unit"); + } + _ => panic!(), + } + } + + // --- enum / overload pipeline (step 5) ------------------------- + + #[test] + fn enum_decl_ships_namespace_eval_prelude() { + let dir = tempfile::tempdir().unwrap(); + let prep = prepare( + "enum Direction = {\n North\n South\n}\n", + dir.path(), + &empty_session(), + ) + .unwrap(); + // The prelude is shipped as a synthetic PreparedCommand. + let prelude = prep + .commands + .iter() + .find(|c| c.tcl.contains("namespace eval Direction")) + .expect("expected enum prelude in prepared commands"); + assert!(prelude.tcl.contains("proc North {}")); + assert!(prelude.tcl.contains("proc South {}")); + assert!(prelude.tcl.contains("proc tag {v}")); + assert!(prelude.tcl.contains("proc payload {v}")); + assert!(prelude.tcl.contains("proc repr {args}")); + } + + #[test] + fn overload_set_ships_dispatcher_and_mangled_specializations() { + let dir = tempfile::tempdir().unwrap(); + let prep = prepare( + "enum E = {\n A: string\n B: int\n}\n\ + proc f {v: E::A} string { return $v }\n\ + proc f {v: E::B} string { return $v }\n", + dir.path(), + &empty_session(), + ) + .unwrap(); + // Dispatcher emitted with the switch body. The dispatcher + // takes the standard kwargs envelope (`{args}`), walks + // kwargs for `-v `, then switches on the + // tag. + let dispatcher = prep + .commands + .iter() + .find(|c| { + c.tcl.contains("proc f {args}") + && c.tcl.contains("switch") + && c.tcl.contains("__f__") + }) + .expect("expected dispatcher for `f`"); + assert!(dispatcher.tcl.contains("__f__A")); + assert!(dispatcher.tcl.contains("__f__B")); + // Specializations emitted under mangled names — look for + // the `proc __f__A` declaration (rather than `proc f`). + assert!( + prep.commands + .iter() + .any(|c| c.tcl.contains("proc __f__A {args}")), + "expected specialization under __f__A: tcls={:?}", + prep.commands.iter().map(|c| &c.tcl).collect::>() + ); + assert!( + prep.commands + .iter() + .any(|c| c.tcl.contains("proc __f__B {args}")), + "expected specialization under __f__B" + ); + // The user-visible name `f` should NOT appear as a + // user-procedure declaration — only as the dispatcher. + // (The dispatcher's body has `proc f {v args}` which we + // already accounted for above; what we're guarding + // against is a leaked `proc f {args} { ::vw::kwargs ... }` + // specialization.) + let leaked_f = prep + .commands + .iter() + .filter(|c| c.tcl.contains("proc f {args} { ::vw::kwargs")) + .count(); + assert_eq!( + leaked_f, 0, + "specialization should NOT have leaked under public name `f`" + ); + } + + #[test] + fn cross_batch_newtype_recursion_emits_generic_repr() { + // Reproduces the user-reported regression: batch 1 + // declares `type Properties = dict` and a + // proc returning Properties; batch 2 calls that proc. + // The wrap_with_repr in batch 2 should walk Properties's + // underlying (the dict generic) and emit + // `dict_string_string::repr` so the user's + // `Properties::repr` body can find it. + // + // Pre-fix: type_decl_table was per-batch, so batch 2 + // couldn't see Properties; the recursion didn't fire; + // dict_string_string::repr was never emitted; the + // user's body errored with `invalid command name`. + let dir = tempfile::tempdir().unwrap(); + let mut session = Session::new(); + let first = prepare( + "type Properties = {dict}\n\ + proc Properties::repr {v} { return $v }\n\ + proc Properties::from {v} { return $v }\n\ + proc Properties::to {v} { return $v }\n\ + proc get_props {} Properties { return {a 1 b 2} }\n", + dir.path(), + &session, + ) + .unwrap(); + session.commit(first.batch); + + // Batch 2: just the call. parsed.document doesn't have + // the type decl — it must come from `session`. + let second = prepare("get_props\n", dir.path(), &session).unwrap(); + let call = second + .commands + .iter() + .find(|c| c.tcl.contains("__vw_result")) + .expect("expected wrapped call to get_props"); + // The wrap must include the monomorphized dict repr + // (reached by recursing through Properties's underlying). + assert!( + call.tcl.contains("namespace eval dict_string_string"), + "expected dict_string_string::repr in wrap (newtype \ + recursion across batches): {}", + call.tcl + ); + // And the top-level dispatch goes through Properties::repr + // with the `-v` form, not positional. + assert!( + call.tcl.contains("Properties::repr -v $__vw_result"), + "expected Properties::repr dispatch via -v form: {}", + call.tcl + ); + } +} diff --git a/vw-repl/src/render.rs b/vw-repl/src/render.rs new file mode 100644 index 0000000..918acad --- /dev/null +++ b/vw-repl/src/render.rs @@ -0,0 +1,366 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Scrollback rendering helpers shared between `ui::draw_scrollback` +//! and `App` mouse-selection. Both need the same view of "how does the +//! scrollback look on screen, row by row" — the UI to render it, the +//! App to map mouse clicks to text positions and extract the selected +//! substring on copy. +//! +//! The flow is: [`entry_lines`] turns each `ScrollbackEntry` into one +//! styled [`Line`] per source line; [`wrap_lines`] then breaks each of +//! those at the rendered column width into screen-row–sized chunks. +//! After wrapping, screen-row N is `wrapped[scroll + N]` — that 1:1 +//! mapping is what makes mouse-cell → text-cell trivial. With +//! ratatui's built-in `Wrap { trim: false }` we'd have to replay +//! ratatui's word-wrap to find the same mapping, which we don't want +//! to maintain in lockstep. + +use ratatui::style::{Color, Modifier, Style}; +use ratatui::text::{Line, Span}; + +use crate::app::{ScrollbackEntry, ScrollbackKind}; + +/// One styled [`Line`] per source line in `entry`. The leading +/// 2-cell column is the kind-prefix (`› `, `· `, `⚠ `, etc.) on the +/// first source line and two spaces on continuation lines, so a +/// multi-line entry visually hangs together. +/// +/// `area_width` is the terminal column count — used to +/// right-justify the per-input timer marker on the first line of +/// an `Input` entry. Pass the same width the renderer will wrap +/// to so the timer ends up flush at the right margin. +pub fn entry_lines( + entry: &ScrollbackEntry, + area_width: u16, +) -> Vec> { + let orange = Color::Rgb(255, 140, 0); + let (prefix, prefix_style) = match entry.kind { + ScrollbackKind::Input => ( + "› ", + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ), + ScrollbackKind::Result => (" ", Style::default().fg(Color::Gray)), + ScrollbackKind::Stdout => (" ", Style::default().fg(Color::White)), + ScrollbackKind::Error => ( + "✗ ", + Style::default().fg(Color::Red).add_modifier(Modifier::BOLD), + ), + ScrollbackKind::Warning => ( + "⚠ ", + Style::default().fg(orange).add_modifier(Modifier::BOLD), + ), + ScrollbackKind::Notice => ("· ", Style::default().fg(Color::DarkGray)), + }; + let body_style = match entry.kind { + ScrollbackKind::Input => Style::default().fg(Color::White), + ScrollbackKind::Result => Style::default().fg(Color::Gray), + ScrollbackKind::Stdout => Style::default().fg(Color::White), + ScrollbackKind::Error => Style::default().fg(Color::Red), + ScrollbackKind::Warning => Style::default().fg(orange), + ScrollbackKind::Notice => Style::default().fg(Color::DarkGray), + }; + // For Input entries with a timer, render `` flush + // right on the first line. Color follows whether it's still + // running (dim while live) vs. completed (subtle gray). + let timer = timer_for(entry); + // Highlighter is opt-in per kind: typed Result entries and + // captured Stdout lines both can carry compiler-emitted repr + // output (the auto-generated `::repr` shape with `KEY + // Variant(...)` and multi-line nested blocks). Try parsing + // each line as a repr; on match, emit per-token styled spans + // (key=blue, variant=teal, punct=dim, scalar=green). Lines + // that don't parse (raw `puts hi`, error continuations, etc.) + // fall back to the entry's body style. Input/Error/Warning/ + // Notice keep their single-color body rendering — those are + // not repr-formatted. + let highlight = + matches!(entry.kind, ScrollbackKind::Result | ScrollbackKind::Stdout); + let mut out = Vec::new(); + for (i, line) in entry.text.lines().enumerate() { + let leading = if i == 0 { prefix } else { " " }; + let mut spans: Vec> = + vec![Span::styled(leading.to_string(), prefix_style)]; + if highlight { + if let Some(highlighted) = crate::highlight::highlight_line(line) { + spans.extend(highlighted); + } else { + spans.push(Span::styled(line.to_string(), body_style)); + } + } else { + spans.push(Span::styled(line.to_string(), body_style)); + } + if i == 0 { + if let Some((label, label_style)) = timer.as_ref() { + let used: usize = + spans.iter().map(|s| display_cells(&s.content)).sum(); + let label_w = display_cells(label); + if (used + label_w + 1) as u16 <= area_width { + let pad = area_width as usize - used - label_w; + spans.push(Span::raw(" ".repeat(pad))); + spans.push(Span::styled(label.clone(), *label_style)); + } + } + } + out.push(Line::from(spans)); + } + if out.is_empty() { + out.push(Line::from(vec![Span::styled( + prefix.to_string(), + prefix_style, + )])); + } + out +} + +/// `(label, style)` for an entry's elapsed-time marker, or `None` +/// when the entry isn't timed. Color hints whether the timer is +/// still live (running) or frozen (completed). +fn timer_for(entry: &ScrollbackEntry) -> Option<(String, Style)> { + let start = entry.started_at?; + let end = entry.completed_at.unwrap_or_else(std::time::Instant::now); + let elapsed = end.saturating_duration_since(start); + let label = format_duration(elapsed); + let style = if entry.completed_at.is_some() { + // Frozen at final value — quiet, post-fact. + Style::default().fg(Color::DarkGray) + } else { + // Live — slightly more present so the user sees it's + // still moving. + Style::default().fg(Color::Yellow) + }; + Some((label, style)) +} + +/// Format a duration as `Ns`, `M:SS`, or `H:MM:SS` depending on +/// magnitude. Always second-granularity; never fractional. Matches +/// what users expect for "how long did this take" markers. +pub fn format_duration(d: std::time::Duration) -> String { + let total = d.as_secs(); + if total < 60 { + format!("{total}s") + } else if total < 3600 { + format!("{}:{:02}", total / 60, total % 60) + } else { + let h = total / 3600; + let m = (total % 3600) / 60; + let s = total % 60; + format!("{h}:{m:02}:{s:02}") + } +} + +/// Crude width estimator — counts chars, treating each as one +/// terminal cell. Good enough for our prefix glyphs (`› ` etc., +/// each rendered as one cell in monospace terminals) and ASCII +/// timer labels. A full unicode-width crate would be more +/// correct but isn't worth the dep for the small set of +/// characters this code emits. +fn display_cells(s: &str) -> usize { + s.chars().count() +} + +/// Split each input line into screen-row-sized chunks of `width` +/// columns, preserving span styles across the split. The output +/// renders 1:1 against screen rows when fed to a `Paragraph` with no +/// further wrapping, so screen-row N is `out[scroll + N]`. +/// +/// Splitting is character-based (no word-boundary respect) — this is +/// REPL output, not prose; long Vivado property dicts and Tcl errors +/// don't have natural break points. +/// Cheap pre-computation of how many wrapped terminal rows an +/// entry will occupy at the given width — WITHOUT actually +/// allocating wrapped lines. O(text length) per entry, no heap +/// allocations beyond the iterator. +/// +/// Used by the viewport-slicing render path to find which +/// entries intersect the visible window in linear time, so the +/// expensive [`entry_lines`] + [`wrap_lines`] only runs on the +/// handful of entries actually in view. Without this, a huge +/// entry (e.g. the formatted `util::props` output) gets +/// fully re-wrapped on every draw — turning every wheel event +/// into multi-MB of per-char allocation. +/// +/// The count must match what [`entry_lines`] + [`wrap_lines`] +/// actually produce: each natural text line contributes +/// `ceil((prefix + body_chars) / width)` wrapped rows (min 1). +/// The Input-entry timer suffix is ignored — when it fits it +/// pads the first line to exactly `width` (still 1 row); when +/// it doesn't fit it isn't added (so the body wraps normally +/// without it). Either way the row count matches. +pub fn count_wrapped_rows(entry: &ScrollbackEntry, width: u16) -> u32 { + if width == 0 { + return 1; + } + let w = width as usize; + // Every entry kind gets a 2-cell prefix ("› ", " ", etc.). + let prefix_width = 2; + let mut rows: u32 = 0; + let mut had_lines = false; + for line in entry.text.lines() { + had_lines = true; + let body_chars = line.chars().count(); + let total = body_chars.saturating_add(prefix_width).max(1); + let line_rows = total.div_ceil(w).max(1); + rows = rows.saturating_add(line_rows as u32); + } + if !had_lines { + // Empty text → entry_lines emits one blank line. + rows = 1; + } + rows +} + +pub fn wrap_lines(input: Vec>, width: u16) -> Vec> { + if width == 0 { + return input; + } + let w = width as usize; + let mut out = Vec::with_capacity(input.len()); + for line in input { + // Flatten spans → (char, style) so chunking can ignore the + // span boundaries and only care about per-cell style. + let mut chars: Vec<(char, Style)> = Vec::new(); + for span in &line.spans { + for c in span.content.chars() { + chars.push((c, span.style)); + } + } + if chars.is_empty() { + out.push(Line::from("")); + continue; + } + for chunk in chars.chunks(w) { + out.push(merge_to_line(chunk)); + } + } + out +} + +fn merge_to_line(chunk: &[(char, Style)]) -> Line<'static> { + let mut spans = Vec::new(); + let mut buf = String::new(); + let mut cur_style = chunk[0].1; + for (c, st) in chunk { + if *st != cur_style { + spans.push(Span::styled(std::mem::take(&mut buf), cur_style)); + cur_style = *st; + } + buf.push(*c); + } + if !buf.is_empty() { + spans.push(Span::styled(buf, cur_style)); + } + Line::from(spans) +} + +/// Plain-text content of a [`Line`] — span styles dropped, content +/// concatenated. Used to extract the selected substring for clipboard +/// copy. +pub fn line_plain_text(line: &Line<'_>) -> String { + let mut out = String::new(); + for span in &line.spans { + out.push_str(span.content.as_ref()); + } + out +} + +/// Re-style cells in `lines` that fall inside the selection range, +/// `[start, end)`. Both endpoints are `(row, col)` indices into +/// `lines` (the post-wrap, post-scroll Vec). The range may be +/// inverted (cursor before anchor); callers should normalize first. +pub fn apply_selection_highlight( + lines: &mut [Line<'static>], + start: (usize, usize), + end: (usize, usize), +) { + let (sr, sc) = start; + let (er, ec) = end; + for (row_idx, line) in lines.iter_mut().enumerate() { + if row_idx < sr || row_idx > er { + continue; + } + let row_start = if row_idx == sr { sc } else { 0 }; + let row_end = if row_idx == er { ec } else { usize::MAX }; + highlight_cols(line, row_start, row_end); + } +} + +fn highlight_cols(line: &mut Line<'static>, start: usize, end: usize) { + // Rebuild spans, splitting any that straddle the selection + // boundary so the REVERSED modifier applies to exactly the cells + // in [start, end). + let mut new_spans: Vec> = Vec::new(); + let mut col = 0usize; + for span in line.spans.drain(..) { + let span_chars: Vec = span.content.chars().collect(); + let len = span_chars.len(); + let span_start = col; + let span_end = col + len; + col = span_end; + + if span_end <= start || span_start >= end { + // Wholly outside selection — push unchanged. + new_spans.push(span); + continue; + } + + // Compute the three potential sub-pieces [..lo, lo..hi, hi..] + // where lo, hi are local offsets within span_chars. + let lo = start.saturating_sub(span_start).min(len); + let hi = end.saturating_sub(span_start).min(len); + + if lo > 0 { + let s: String = span_chars[..lo].iter().collect(); + new_spans.push(Span::styled(s, span.style)); + } + if hi > lo { + let s: String = span_chars[lo..hi].iter().collect(); + new_spans.push(Span::styled( + s, + span.style.add_modifier(Modifier::REVERSED), + )); + } + if hi < len { + let s: String = span_chars[hi..].iter().collect(); + new_spans.push(Span::styled(s, span.style)); + } + } + line.spans = new_spans; +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + #[test] + fn duration_seconds_under_minute() { + assert_eq!(format_duration(Duration::from_secs(0)), "0s"); + assert_eq!(format_duration(Duration::from_secs(1)), "1s"); + assert_eq!(format_duration(Duration::from_secs(59)), "59s"); + } + + #[test] + fn duration_mss_minute_to_hour() { + assert_eq!(format_duration(Duration::from_secs(60)), "1:00"); + assert_eq!(format_duration(Duration::from_secs(75)), "1:15"); + assert_eq!(format_duration(Duration::from_secs(3599)), "59:59"); + } + + #[test] + fn duration_hmmss_hour_plus() { + assert_eq!(format_duration(Duration::from_secs(3600)), "1:00:00"); + assert_eq!(format_duration(Duration::from_secs(3661)), "1:01:01"); + assert_eq!(format_duration(Duration::from_secs(36_000)), "10:00:00"); + } + + #[test] + fn duration_truncates_subsecond() { + // 5.9s should render as "5s" — second granularity only, + // never fractional. + assert_eq!(format_duration(Duration::from_millis(5_900)), "5s"); + } +} diff --git a/vw-repl/src/session.rs b/vw-repl/src/session.rs new file mode 100644 index 0000000..3937cd2 --- /dev/null +++ b/vw-repl/src/session.rs @@ -0,0 +1,204 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! In-memory REPL session, held as parsed batches rather than a +//! re-concatenated text blob. +//! +//! Every successful input contributes one [`SessionBatch`] — the +//! loaded program (own source + import edges), its parsed +//! [`vw_htcl::Document`], and the map from proc-name to +//! [`ProcLocation`] for every proc the batch declared. Prior +//! batches are read by [`crate::lower::prepare`] when lowering the +//! next input: their signatures resolve unknown calls; their proc +//! locations let the error renderer translate Tcl's `(procedure +//! "X" line N)` frames back to the real `.htcl` file the wrapper +//! body was declared in. +//! +//! Why this shape (vs. the original text-blob prelude): +//! +//! 1. **Performance.** After a few `src @lib` imports the prelude +//! is hundreds of thousands of lines. Re-parsing and re-walking +//! it on every input is what made the REPL feel laggy. Storing +//! parsed state means each new input parses only its own +//! content + transitive imports — O(new), not O(total). +//! 2. **Error rendering.** A drill-down frame for a wrapper proc +//! declared in an earlier batch knows the real `.htcl` path it +//! came from, so `(procedure "vivado::create_bd_design" line +//! 2)` resolves to `vivado-cmd/bd.htcl:42` instead of +//! `(input):199185` of the giant combined scratch. + +use std::collections::HashMap; + +use vw_htcl::{Document, LoadedProgram, ProcSignature, TypeDecl}; + +use crate::lower::ProcLocation; + +/// One committed input: the parsed program it produced, plus the +/// proc-location map the lowerer derived from it. Stored on a +/// per-batch basis so signatures and proc lookups can fold across +/// the whole session without ever re-parsing a prior batch. +#[derive(Debug)] +pub struct SessionBatch { + /// Loader output for this batch — file paths, import edges, + /// and the flattened source. Held alongside the parsed + /// document so future analyzer features (completion, goto- + /// def, hover) can walk back to per-file context without + /// re-running the loader. Spans inside [`document`] are + /// offsets into [`program.source`](LoadedProgram::source); + /// keeping the program alive keeps those spans meaningful. + #[allow(dead_code)] + pub program: LoadedProgram, + pub document: Document, + pub procs: HashMap, +} + +/// A live REPL session: every committed batch in order. +#[derive(Debug, Default)] +pub struct Session { + batches: Vec, +} + +impl Session { + pub fn new() -> Self { + Self::default() + } + + /// Append a batch — called from the App after every successful + /// eval (including the pure-`src` no-Tcl-to-eval case, which + /// commits immediately because no eval can fail). + pub fn commit(&mut self, batch: SessionBatch) { + self.batches.push(batch); + } + + /// Build a merged signature table covering every proc declared + /// in the session so far. Later batches shadow earlier ones, + /// matching Tcl's "second `proc` redefines" semantics. The + /// returned map borrows from `self`; held only for the duration + /// of the next batch's prepare() call. + pub fn signature_table(&self) -> HashMap { + let mut table: HashMap = HashMap::new(); + for batch in &self.batches { + // Per-batch table merges into the running table; later + // batches' entries overwrite earlier ones via `insert`. + let batch_table = vw_htcl::signature_table(&batch.document); + for (name, sig) in batch_table { + table.insert(name, sig); + } + } + table + } + + /// Same as [`signature_table`] but for `type NAME = …` + /// declarations. Needed when wrapping a typed expression's + /// result through its `repr` proc — the dispatch type may be + /// a newtype declared in a prior batch (e.g. `Properties` + /// from a sourced `@vivado-cmd` library), and the repr + /// codegen walks the underlying to emit the dependent generic + /// repr (`dict_string_Property::repr` in that case). + pub fn type_decl_table(&self) -> HashMap { + let mut table: HashMap = HashMap::new(); + for batch in &self.batches { + let mut diags = Vec::new(); + let batch_table = + vw_htcl::build_type_decl_table(&batch.document, &mut diags); + for (name, td) in batch_table { + table.insert(name, td); + } + } + table + } + + /// Look up the most-recent proc location across every batch. + /// Returns `None` when no batch has declared that proc — the + /// error renderer's drill-down path silently skips such frames + /// (Tcl proc frames for builtins, dynamically-defined procs, + /// etc.). + pub fn lookup_proc(&self, name: &str) -> Option<&ProcLocation> { + for batch in self.batches.iter().rev() { + if let Some(loc) = batch.procs.get(name) { + return Some(loc); + } + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + use vw_htcl::parse; + + fn batch_from(source: &str) -> SessionBatch { + // Build a minimal in-memory LoadedProgram from a string for + // tests that don't care about the loader pipeline. + let parsed = parse(source); + assert!(parsed.errors.is_empty(), "{:?}", parsed.errors); + SessionBatch { + program: LoadedProgram { + source: source.to_string(), + files: Vec::new(), + regions: Vec::new(), + }, + document: parsed.document, + procs: HashMap::new(), + } + } + + #[test] + fn signature_table_folds_across_batches() { + let mut s = Session::new(); + s.commit(batch_from("proc foo { x } { }\n")); + s.commit(batch_from("proc bar { y } { }\n")); + let table = s.signature_table(); + assert!(table.contains_key("foo")); + assert!(table.contains_key("bar")); + } + + #[test] + fn later_batch_shadows_earlier_signature() { + // Second `proc foo` redefines the first — the merged table + // returns the newer signature. + let mut s = Session::new(); + s.commit(batch_from("proc foo { x } { }\n")); + s.commit(batch_from("proc foo { y z } { }\n")); + let table = s.signature_table(); + let sig = table.get("foo").unwrap(); + let arg_names: Vec<&str> = + sig.args.iter().map(|a| a.name.as_str()).collect(); + assert_eq!(arg_names, vec!["y", "z"]); + } + + #[test] + fn lookup_proc_returns_latest_batch() { + // Two batches both register `foo` in their `procs` map (the + // lowerer normally does this, but here we set it manually). + // `lookup_proc` returns the entry from the most recent + // batch. + let mut a = batch_from("proc foo { x } { }\n"); + let mut b = batch_from("proc foo { y } { }\n"); + a.procs.insert( + "foo".into(), + ProcLocation { + file: None, + body_start_line: 10, + body_lines: vec!["from-a".into()], + }, + ); + b.procs.insert( + "foo".into(), + ProcLocation { + file: None, + body_start_line: 20, + body_lines: vec!["from-b".into()], + }, + ); + let mut s = Session::new(); + s.commit(a); + s.commit(b); + let got = s.lookup_proc("foo").unwrap(); + assert_eq!(got.body_start_line, 20); + assert_eq!(got.body_lines[0], "from-b"); + assert!(s.lookup_proc("missing").is_none()); + } +} diff --git a/vw-repl/src/trace.rs b/vw-repl/src/trace.rs new file mode 100644 index 0000000..0340c42 --- /dev/null +++ b/vw-repl/src/trace.rs @@ -0,0 +1,163 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Stack-frame rewriting for Vivado error / warning messages. +//! +//! Vivado reports errors with frames like +//! ` at :14 in ::configure_cips` +//! +//! where `` is the scratch path the lowerer ships and the +//! line number is body-relative inside the proc. This module maps +//! those back to the original htcl source: +//! ` at ip/cips.htcl:69 in ::configure_cips` +//! +//! Both the REPL (driven by a multi-batch [`crate::session::Session`]) +//! and the `vw run` CLI driver (single batch) feed messages through +//! the same `resolve_stack_frames_with` machinery — they differ only +//! in how they answer the "where does proc P live?" question, supplied +//! as a closure. + +use std::path::Path; + +use crate::lower::ProcLocation; + +/// One stack-frame line after rewriting. Callers dedupe adjacent +/// frames that resolve to the same `(proc, line)` because Vivado +/// often emits two frames per logical site (one for the `proc`'s +/// `kwargs` wrapper, one for the real body). +pub struct RewrittenFrame { + pub proc: String, + pub line: u32, + pub formatted: String, +} + +/// Walk a message line-by-line, rewriting any `at :N in +/// ::proc` frames using `lookup`. Lines that don't match the +/// stack-frame grammar (regular message prose) pass through +/// unchanged. Adjacent frames that resolve to the same +/// `(proc, line)` are collapsed — the Vivado kwargs-wrapper + +/// body-call doubling becomes a single rendered frame. +pub fn resolve_stack_frames_with( + msg: &str, + lookup: F, + input_file: Option<&Path>, +) -> String +where + F: Fn(&str) -> Option, +{ + let mut out = String::with_capacity(msg.len()); + let mut last_resolved_key: Option<(String, u32)> = None; + for (i, line) in msg.lines().enumerate() { + if i > 0 { + out.push('\n'); + } + let Some(rewritten) = rewrite_stack_line(line, &lookup, input_file) + else { + out.push_str(line); + last_resolved_key = None; + continue; + }; + let key = (rewritten.proc.clone(), rewritten.line); + if last_resolved_key.as_ref() == Some(&key) { + if out.ends_with('\n') { + out.pop(); + } + continue; + } + last_resolved_key = Some(key); + out.push_str(&rewritten.formatted); + } + out +} + +/// Parse a single line like ` at :14 in ::configure_cips` +/// and rewrite it to point at the user's actual htcl source. +/// Returns `None` when the line isn't a stack frame (regular +/// message text) or when the proc isn't one we know about (Vivado +/// builtins, dynamic procs, etc.) — caller passes such lines +/// through unchanged. +pub fn rewrite_stack_line( + line: &str, + lookup: F, + input_file: Option<&Path>, +) -> Option +where + F: Fn(&str) -> Option, +{ + // Grammar emitted by `vw::format_frame`: + // " at :N in ::procname" ← lookup ProcLocation by name + // " at :N in ::procname" ← already absolute + // " at :N" ← anonymous eval / top-level + // " at " ← location-less + let rest = line.strip_prefix(" at ")?; + let (loc_str, proc_part) = match rest.split_once(" in ") { + Some((l, p)) => (l, Some(p.trim().to_string())), + None => (rest, None), + }; + let (file_part, line_part) = loc_str.rsplit_once(':')?; + let body_line: u32 = line_part.parse().ok()?; + + // Top-level `:N` frame (no proc). + let Some(proc) = proc_part else { + if file_part != "" { + return None; + } + let path = input_file?; + return Some(RewrittenFrame { + proc: String::new(), + line: body_line, + formatted: format!(" at {}:{body_line}", display_path(path)), + }); + }; + + // Already-absolute frames don't need rewriting; pass through + // (dedup downstream still benefits from parsed proc+line). + if file_part != "" { + return Some(RewrittenFrame { + proc, + line: body_line, + formatted: line.to_string(), + }); + } + // `:N in ::proc` — Tcl reports "line N of the proc + // body." Resolve through the lookup. Tcl always reports + // fully-qualified names (leading `::`); the proc table + // indexes them without (see `lower::qualify`), so strip + // before lookup. + let lookup_name = proc.strip_prefix("::").unwrap_or(&proc); + let loc = lookup(lookup_name)?; + let (abs_line, _content) = loc.resolve_body_line(body_line)?; + let path_str = match loc.file.as_deref() { + Some(p) => display_path(p), + None => match input_file { + Some(p) => display_path(p), + None => "".to_string(), + }, + }; + Some(RewrittenFrame { + proc: proc.clone(), + line: abs_line, + formatted: format!(" at {path_str}:{abs_line} in {proc}"), + }) +} + +/// Pretty-print a file path for diagnostics: prefer the cwd- +/// relative form (`ip/cips.htcl`) when the path is under the +/// current working directory, then home-relative (`~/src/…`), +/// then the absolute form. Matches the REPL's scrollback so +/// vw run + vw repl render the same way. +pub fn display_path(path: &Path) -> String { + if let Ok(cwd) = std::env::current_dir() { + if let Ok(rel) = path.strip_prefix(&cwd) { + return rel.display().to_string(); + } + } + if let Ok(home) = std::env::var("HOME") { + let home_path = Path::new(&home); + if let Ok(rel) = path.strip_prefix(home_path) { + return format!("~/{}", rel.display()); + } + } + path.display().to_string() +} diff --git a/vw-repl/src/ui.rs b/vw-repl/src/ui.rs new file mode 100644 index 0000000..48e4401 --- /dev/null +++ b/vw-repl/src/ui.rs @@ -0,0 +1,306 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! ratatui rendering for the REPL. +//! +//! Layout (top-to-bottom): +//! +//! ```text +//! ┌──────────────────────────────────────────┐ +//! │ scrollback ▲│ +//! │ (eval log, oldest top, newest bottom) ║│ +//! │ ▼│ +//! ├──────────────────────────────────────────┤ +//! │ input (multi-line, tui-textarea owned) │ +//! ├──────────────────────────────────────────┤ +//! │ status: vivado state | hints │ +//! └──────────────────────────────────────────┘ +//! ``` +//! +//! When Ctrl-R is active, a centered overlay replaces the input area +//! with the search query and the matching history entry. + +use ratatui::layout::{Constraint, Direction, Layout, Rect}; +use ratatui::style::{Color, Modifier, Style}; +use ratatui::text::{Line, Span}; +use ratatui::widgets::{Block, Borders, Clear, Paragraph, Wrap}; +use ratatui::Frame; +use tui_textarea::TextArea; + +use crate::app::{App, ReverseSearch}; + +pub fn draw(f: &mut Frame, app: &mut App) { + let layout = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Min(1), // scrollback (fills) + Constraint::Length(input_height(app)), // input + Constraint::Length(1), // status bar + ]) + .split(f.area()); + + draw_scrollback(f, layout[0], app); + draw_input(f, layout[1], app); + draw_status(f, layout[2], app); + + if let Some(rs) = app.reverse_search() { + draw_reverse_search(f, layout[1], rs); + } +} + +fn input_height(app: &App) -> u16 { + // Start at a 5-line minimum so the user has room to draft a + // multi-statement entry without the input box flickering taller + // mid-typing. Grows past 5 with the buffer, capped at 12 so + // very long entries don't squeeze the scrollback out. + let lines = app.input_line_count().clamp(5, 12) as u16; + lines + 2 // +2 for the top/bottom block border +} + +fn draw_scrollback(f: &mut Frame, area: Rect, app: &mut App) { + // Hand the area back to App so mouse-event handlers can + // translate screen coords into scrollback rows. + app.set_scrollback_area(area); + if area.width == 0 || area.height == 0 { + return; + } + + // Pass 1: cheap per-entry wrapped-row count, no allocations. + // O(text length) for each, vs. the old approach which built + // and wrapped every entry per draw — turning a single huge + // entry into multi-MB of per-char allocation on every wheel + // tick. With this pass, total work per draw is O(scrollback) + // for counting + O(viewport) for actually wrapping the + // visible window. + let counts: Vec = app + .scrollback() + .iter() + .map(|e| crate::render::count_wrapped_rows(e, area.width)) + .collect(); + let total: u32 = counts.iter().fold(0u32, |a, b| a.saturating_add(*b)); + + let max_scroll = total.saturating_sub(area.height as u32); + let scroll_offset = if app.scrollback_follow() { + max_scroll + } else { + u32::from(app.scrollback_scroll()).min(max_scroll) + }; + app.set_last_rendered_scroll(scroll_offset.min(u32::from(u16::MAX)) as u16); + + // Pass 2: walk entries; build wrapped lines only for those + // intersecting the viewport. Entries entirely above viewport + // are skipped (their row count contributes to the offset we + // pass to ratatui's `Paragraph::scroll`). Entries entirely + // below viewport stop the walk. + let viewport_start = scroll_offset; + let viewport_end = viewport_start.saturating_add(area.height as u32); + + let mut visible: Vec> = + Vec::with_capacity(area.height as usize + 16); + let mut accumulated: u32 = 0; + let mut skipped_rows: u32 = 0; + { + let scrollback = app.scrollback(); + for (entry, &count) in scrollback.iter().zip(counts.iter()) { + let entry_end = accumulated.saturating_add(count); + if entry_end <= viewport_start { + // Entirely above viewport — count its rows toward + // the local scroll offset and move on without + // wrapping. + skipped_rows = entry_end; + accumulated = entry_end; + continue; + } + if accumulated >= viewport_end { + break; + } + let lines = crate::render::entry_lines(entry, area.width); + let wrapped = crate::render::wrap_lines(lines, area.width); + visible.extend(wrapped); + accumulated = entry_end; + } + } + + // Selection highlight: coords are global wrapped-row indices. + // Subtract `skipped_rows` so they index into the local + // `visible` Vec instead. + if let Some(sel) = app.selection() { + let (start, end) = sel.ordered(); + let skipped = skipped_rows as usize; + let local_start = (start.0.saturating_sub(skipped), start.1); + let local_end = (end.0.saturating_sub(skipped), end.1); + crate::render::apply_selection_highlight( + &mut visible, + local_start, + local_end, + ); + } + + // We've already skipped entries above viewport; ratatui only + // needs to skip the remaining rows within the first visible + // entry (i.e. the offset from where that entry started to + // where the viewport actually begins). + let local_scroll = viewport_start + .saturating_sub(skipped_rows) + .min(u32::from(u16::MAX)) as u16; + + // No surrounding block: the scrollback's main job is to be + // copy-pastable. A box-drawing border around each visible row + // means any selection that spans full lines pulls in `│` chars + // at the start and end of every line. The input box below the + // scrollback still has its own border, which provides enough + // visual separation between the two regions. + let paragraph = Paragraph::new(visible).scroll((local_scroll, 0)); + f.render_widget(paragraph, area); +} + +fn draw_input(f: &mut Frame, area: Rect, app: &mut App) { + // tui-textarea renders itself with its current cursor; the + // surrounding block provides a visual frame. + let block = Block::default() + .borders(Borders::ALL) + .title(input_title(app)); + let ta: &mut TextArea<'static> = app.input_mut(); + ta.set_block(block); + f.render_widget(&*ta, area); +} + +fn input_title(app: &App) -> String { + if app.eval_in_flight() { + " input — vivado: running ".to_string() + } else if app.input_is_complete() { + " input — Enter to run ".to_string() + } else { + " input — Enter for newline (parse incomplete) ".to_string() + } +} + +fn draw_status(f: &mut Frame, area: Rect, app: &App) { + let (label, bg) = match app.worker_state() { + // Indigo when Vivado is sitting idle, ready for input — + // the "you can interact" steady state. + WorkerStatusView::Ready => (" vivado: ready ", Color::Rgb(75, 0, 130)), + // Orange whenever Vivado is anything but ready — starting + // up, mid-eval, or dead. Catches the eye so the user + // notices they can't (yet, or any longer) drive the + // session. + WorkerStatusView::Starting => { + (" vivado: starting ", Color::Rgb(255, 140, 0)) + } + WorkerStatusView::Running => { + (" vivado: running ", Color::Rgb(255, 140, 0)) + } + WorkerStatusView::Down => (" vivado: down ", Color::Rgb(255, 140, 0)), + }; + let hint = if app.reverse_search().is_some() { + "Esc cancel · Enter accept · Ctrl-R older".to_string() + } else { + let mouse = if app.mouse_capture() { + "F2 terminal-sel" + } else { + "F2 mouse-on" + }; + format!( + "Ctrl-D exit · Ctrl-P/N history · Ctrl-R search · \ + Ctrl-K/J or wheel scroll · \ + drag to copy · {mouse} · :load · :quit" + ) + }; + // Split the status bar into [hint (left, fills) | status + // indicator (right, fixed width)] so the status badge always + // anchors to the bottom-right corner. + let badge_width = label.chars().count() as u16; + let layout = Layout::default() + .direction(Direction::Horizontal) + .constraints([Constraint::Min(1), Constraint::Length(badge_width)]) + .split(area); + f.render_widget( + Paragraph::new(Span::styled( + hint, + Style::default().fg(Color::DarkGray), + )), + layout[0], + ); + f.render_widget( + Paragraph::new(Span::styled( + label, + Style::default() + .bg(bg) + .fg(Color::White) + .add_modifier(Modifier::BOLD), + )), + layout[1], + ); +} + +fn draw_reverse_search(f: &mut Frame, anchor: Rect, rs: &ReverseSearch) { + let area = centered_rect(80, 5, f.area(), anchor); + f.render_widget(Clear, area); + let title = format!( + " reverse-i-search ({}) ", + if rs.match_index.is_some() { + "match" + } else if rs.query.is_empty() { + "type to search" + } else { + "no match" + } + ); + let body = vec![ + Line::from(vec![ + Span::styled("query: ", Style::default().fg(Color::DarkGray)), + Span::styled( + rs.query.clone(), + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + ), + ]), + Line::from(Span::raw("")), + Line::from(vec![ + Span::styled("match: ", Style::default().fg(Color::DarkGray)), + Span::raw(rs.match_text.clone()), + ]), + ]; + let para = Paragraph::new(body) + .block( + Block::default() + .borders(Borders::ALL) + .title(title) + .style(Style::default().bg(Color::Black)), + ) + .wrap(Wrap { trim: false }); + f.render_widget(para, area); +} + +/// Compute a centered rectangle for a popup. `anchor` is the area the +/// popup is logically attached to (the input area); we expand around +/// the screen center but never overflow the parent. +fn centered_rect( + percent_x: u16, + height_lines: u16, + full: Rect, + _anchor: Rect, +) -> Rect { + let popup_w = full.width.saturating_mul(percent_x) / 100; + let popup_h = height_lines.min(full.height); + let x = (full.width.saturating_sub(popup_w)) / 2; + let y = (full.height.saturating_sub(popup_h)) / 2; + Rect { + x, + y, + width: popup_w, + height: popup_h, + } +} + +/// Worker status as the UI sees it. Lives here (not in `app`) so the +/// renderer doesn't have to know about the worker's internal state +/// machine. +pub enum WorkerStatusView { + Starting, + Ready, + Running, + Down, +} diff --git a/vw-vivado/Cargo.toml b/vw-vivado/Cargo.toml new file mode 100644 index 0000000..8ca0916 --- /dev/null +++ b/vw-vivado/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "vw-vivado" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "Vivado EDA backend: spawns a long-lived Vivado worker and drives it via the vw-eda wire protocol" + +[dependencies] +vw-eda = { path = "../vw-eda" } +serde.workspace = true +serde_json.workspace = true +thiserror.workspace = true +tokio.workspace = true +async-trait.workspace = true +tempfile.workspace = true +tracing.workspace = true +portable-pty.workspace = true diff --git a/vw-vivado/build.rs b/vw-vivado/build.rs new file mode 100644 index 0000000..f4c7f43 --- /dev/null +++ b/vw-vivado/build.rs @@ -0,0 +1,10 @@ +// Cargo doesn't track files included via `include_str!` for +// rebuild purposes — it only knows about `.rs` source files. +// The Vivado shim is `include_str!`'d into `worker.rs` and +// baked into the binary at compile time; without this build +// script edits to the shim go unnoticed until something else +// triggers a recompile of `vw-vivado`, leaving the deployed +// shim out of sync with the source. +fn main() { + println!("cargo:rerun-if-changed=shim/vivado-shim.tcl"); +} diff --git a/vw-vivado/shim/vivado-shim.tcl b/vw-vivado/shim/vivado-shim.tcl new file mode 100644 index 0000000..c516ed5 --- /dev/null +++ b/vw-vivado/shim/vivado-shim.tcl @@ -0,0 +1,859 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# vw <-> Vivado wire protocol shim. +# +# Sourced by `vivado -mode tcl -source vivado-shim.tcl`. The shim +# connects back to vw over a loopback TCP socket on the port given in +# `$env(VW_PROTOCOL_ADDR)` and then reads newline-delimited JSON +# requests on that socket, writing responses on the same socket. +# +# Why a socket and not stdin/stdout: user TCL is free to call `puts`, +# Vivado prints its own banners and source-echo, and mixing all of +# that with the wire protocol on one stream forces either text +# markers (which a hostile or unlucky `puts` could spoof) or fragile +# OS-specific channels like FIFOs and chardevs. A loopback TCP socket +# works identically on Linux, macOS, and Windows, can't be polluted +# by anything the user writes to stdout, and frees vw to treat +# Vivado's stdout however it wants (forward for `vw run`, capture for +# the REPL/LSP). + +package require json + +namespace eval ::vw { + variable protocol_sock {} + # The eval id currently being processed. `puts` writes that would + # go to stdout while `capturing` is set are forwarded immediately + # as `{"id":N,"stream":"stdout","data":...}` notifications tagged + # with this id, so vw can stream output as it's produced rather + # than waiting for the final response. Required for any + # long-running command (synth_design, route_design, ...). + variable current_eval_id 0 + variable capturing 0 + # Reentrancy guard for the send_msg_id override. Without this, a + # message emitted *during* our own stack-walking or JSON encoding + # would recurse back into the override and either deadlock or + # double-emit. We just fall back to the original handler when + # already inside our wrapper. + variable in_send_msg_id 0 + # Cap on stack frames captured per message. Vivado's internal + # call chains can be 50+ frames deep through tclapp loaders; + # rendering all of them would drown the actual message. The + # cap is per-message, not per-session, so a future deeper trace + # still gets its first N frames. + variable stack_frame_cap 20 +} + +# ---------- puts capture ---------- +# +# Rename the real `puts` so we can install a wrapper that, while +# capturing, forwards each stdout write to vw as a streaming +# notification. Anything that targets a specific channel (stderr, +# the protocol socket, a file) passes through unchanged. Outside of +# eval (`capturing == 0`), stdout writes also pass through — Vivado's +# own messages between commands stay on the process's stdout where vw +# handles them per its `--verbose` setting. + +rename puts ::vw::real_puts + +proc puts {args} { + set len [llength $args] + set start 0 + set nonewline 0 + if {$len > 0 && [lindex $args 0] eq "-nonewline"} { + set nonewline 1 + set start 1 + } + set remaining [expr {$len - $start}] + if {$::vw::capturing} { + if {$remaining == 1} { + # `puts ?-nonewline? string` — implicit stdout + set str [lindex $args $start] + if {!$nonewline} { append str "\n" } + # Catch-wrap because attach_stack_if_message is + # defined later in this script. During shim sourcing + # there's a tiny window where puts exists (this proc) + # but the helper doesn't yet; we'd rather pass the + # raw string through than crash the puts itself. + catch {set str [::vw::attach_stack_if_message $str 2]} + ::vw::stream_stdout $::vw::current_eval_id $str + return + } elseif {$remaining == 2 \ + && [lindex $args $start] eq "stdout"} { + set str [lindex $args [expr {$start + 1}]] + if {!$nonewline} { append str "\n" } + catch {set str [::vw::attach_stack_if_message $str 2]} + ::vw::stream_stdout $::vw::current_eval_id $str + return + } + } + # Fall through to the real puts for: any non-stdout channel, or + # any stdout write when not capturing. + ::vw::real_puts {*}$args +} + +# ---------- JSON helpers ---------- + +# Hand-encode a string per RFC 8259. Vivado's bundled Tcllib doesn't +# include `json::write`, so we provide the minimum we need. +# +# Implementation: `string map` does the bulk-substitution in one +# native Tcl C call, vs. a per-char Tcl loop (which is what we +# used to do). The difference is dramatic at scale — a 1MB puts +# output (the kind `puts [util::props -object $cpm5]` produces) +# went from minutes of per-char `string index`/`scan`/`switch` +# iteration to ~100ms via `string map`. Rare control chars +# (codepoints < 0x20 other than the named whitespace escapes) +# trigger a slow per-char fallback; in practice Vivado property +# values don't contain them, so the fast path covers everything. +proc ::vw::json_string {value} { + # Order matters: backslash must be substituted FIRST so the + # backslashes we introduce for the other escapes aren't + # themselves re-escaped. + set escaped [string map [list \ + "\\" "\\\\" \ + "\"" "\\\"" \ + "\b" "\\b" \ + "\f" "\\f" \ + "\n" "\\n" \ + "\r" "\\r" \ + "\t" "\\t"] $value] + # Fast path: no remaining control chars → just wrap in quotes. + if {![regexp {[\x00-\x08\x0B\x0E-\x1F]} $escaped]} { + return "\"$escaped\"" + } + # Slow path: per-char loop for the remaining control chars. + # Only hit when the string contains rare control codepoints + # — Vivado property values shouldn't, but a user `puts` of + # binary-ish data might. + set out "\"" + set len [string length $escaped] + for {set i 0} {$i < $len} {incr i} { + set ch [string index $escaped $i] + scan $ch %c codepoint + if {$codepoint < 0x20} { + append out [format "\\u%04x" $codepoint] + } else { + append out $ch + } + } + append out "\"" + return $out +} + +# ---------- response helpers ---------- + +# Send a streaming stdout chunk during eval. These notifications are +# distinguishable from the final response by the presence of a +# `stream` field (and absence of `ok`). +proc ::vw::stream_stdout {id data} { + variable protocol_sock + set j [::vw::json_string $data] + ::vw::real_puts $protocol_sock \ + "{\"id\":$id,\"stream\":\"stdout\",\"data\":$j}" + flush $protocol_sock +} + +proc ::vw::send_ok {id result} { + variable protocol_sock + set j_result [::vw::json_string $result] + # Use real_puts explicitly so the wrapper above can never + # accidentally divert protocol traffic into a stream notification. + ::vw::real_puts $protocol_sock \ + "{\"id\":$id,\"ok\":true,\"result\":$j_result}" + flush $protocol_sock +} + +proc ::vw::send_err {id message {code ""} {info ""}} { + variable protocol_sock + set j_msg [::vw::json_string $message] + set fields "\"message\":$j_msg" + if {$code ne ""} { + append fields ",\"code\":[::vw::json_string $code]" + } + if {$info ne ""} { + append fields ",\"info\":[::vw::json_string $info]" + } + ::vw::real_puts $protocol_sock \ + "{\"id\":$id,\"ok\":false,\"error\":{$fields}}" + flush $protocol_sock +} + +proc ::vw::log {msg} { + puts stderr "\[vw-shim\] $msg" + flush stderr +} + +# ---------- kwargs runtime ---------- +# +# Wrapper procs lowered from htcl declare themselves as +# `proc {args} { ::vw::kwargs $args {param default ...} ; }`. +# This helper parses `args` against `sig` (a dict of `param default +# param default ...`) and uses `upvar 1` to set each parameter as +# a local in the caller's frame. After this returns, the wrapper +# body sees `$dir`, `$cell`, `$name`, etc. just as if they were +# standard Tcl parameters with defaults. +# +# Why this exists: htcl is keyword-only at the call site, but Tcl +# proc dispatch is positional. Without this runtime parsing, the +# only way to make `wrap -name x` work would be to rewrite the +# call site to positional form at compile time — which our lowerer +# used to do, but only for top-level calls, not for calls inside +# proc bodies / namespace eval / [ ... ]. Moving the keyword parse +# to runtime makes every call site work uniformly. +# +# Arg shapes supported: +# `-flag value` — value-bearing flag, sets $flag = value +# `-flag` — bare boolean flag (end of args), sets $flag = 1 +# `-flag -other ...`— bare boolean flag (next token is another +# known flag), sets $flag = 1, continues +# +# The bare-flag heuristic matches Vivado's calling convention: +# their APIs and internal Tcl use `-quiet`/`-verbose`/etc. as bare +# booleans. The "is next token a known flag?" disambiguator avoids +# eating a legitimate value that happens to start with `-` (e.g. +# `-filter -name` where the user intended `-filter` to take `-name` +# as its value — but a leading `-` value is exotic enough that we +# accept the ambiguity). +proc ::vw::kwargs {argv sig} { + # Initialize each parameter to its declared default. Also + # initialize a `__vw_kw__set` flag to 0 — wrappers can + # check this to distinguish "user supplied this arg" from + # "we filled in the default", which matters for + # set_property -dict where setting unsupplied properties + # (with their defaults) re-validates the whole cell and + # rejects values Vivado considers out of range for the + # cell's current state. + foreach {name default} $sig { + upvar 1 $name var + set var $default + upvar 1 __vw_kw_${name}_set seen + set seen 0 + } + set n [llength $argv] + set i 0 + while {$i < $n} { + set flag [lindex $argv $i] + if {![string match -* $flag]} { + error "kwargs: expected -flag, got '$flag'" + } + set key [string range $flag 1 end] + if {![dict exists $sig $key]} { + set allowed [join [dict keys $sig] ", "] + error "kwargs: unknown flag '$flag'; allowed: $allowed" + } + # Decide whether the current flag is bare or takes a value. + # Bare iff: at end of args, OR next token is another known + # -flag. + set bare 1 + set next_i [expr {$i + 1}] + if {$next_i < $n} { + set peek [lindex $argv $next_i] + if {![string match -* $peek]} { + set bare 0 + } else { + set peek_key [string range $peek 1 end] + if {![dict exists $sig $peek_key]} { + # Peek looks like a flag but isn't ours — assume + # it's a value for the current flag (e.g. a CLI + # path or arg starting with `-`). + set bare 0 + } + } + } + upvar 1 $key var + upvar 1 __vw_kw_${key}_set seen + set seen 1 + if {$bare} { + set var 1 + incr i + } else { + set var [lindex $argv $next_i] + incr i 2 + } + } +} + +# ---------- bulk property fetch ---------- +# +# `::vw::props_dict ` returns a paired Tcl list (NAME VAL +# NAME VAL …) of every property on ``. The point is a +# single Vivado RPC instead of N: htcl wrappers that want the +# full property bag (e.g. `util::props`) would otherwise issue +# one `extern::get_property` per property × hundreds of +# properties on an IP cell. The PTY round-trip is the dominant +# cost; doing the iteration entirely Vivado-side cuts it to +# constant per call. +proc ::vw::props_dict {obj} { + set out [list] + foreach name [list_property $obj] { + lappend out $name [get_property $name $obj] + } + return $out +} + +# ---------- user-set property tracking ---------- +# +# Vivado offers no per-property "is this at the default?" API +# accessible from Tcl (`get_property`, `list_property`, +# `report_property -all`, `bd::get_properties` all return every +# property's current value with no user-vs-default distinction). +# The only system-of-record is `write_bd_tcl`'s output, which +# requires a full BD serialization round-trip per query. +# +# Instead we keep our own tally: every time +# `vivado_cmd::set_property` is invoked (the htcl-level chokepoint +# all wrappers and user code go through), it records the +# (object, name, value) triples here. `::vw::user_props_dict` / +# `::vw::user_props_nested` read back from this side-channel — +# returning ONLY the properties the user / wrapper explicitly +# pushed, never the ones Vivado cascaded as derived defaults. +# +# Cost: O(properties set) for record (dict insertion), O(props +# returned) for retrieval (dict walk). No file I/O, no full- +# design serialization. Persists across batches in the worker +# until `:restart`. +# +# Caveat: tracks only properties set via the +# `vivado_cmd::set_property` wrapper. Direct `extern::set_property` +# bypasses the recording. That's by design — the wrappers are +# the documented boundary, and bypassing them is an explicit +# opt-out of the tracking machinery. + +namespace eval ::vw { + variable user_set_props +} + +# Record one or more (name, value) pairs as user-set on `obj`. +# `args` is the paired list (name1 val1 name2 val2 …) — same +# shape the wrapper builds before calling `set_property -dict`. +# Last-set wins per property name within an object. +proc ::vw::record_user_props {obj args} { + variable user_set_props + if {![info exists user_set_props]} { + array set user_set_props {} + } + set key [::vw::_user_props_key $obj] + if {![info exists user_set_props($key)]} { + set user_set_props($key) [dict create] + } + set current $user_set_props($key) + foreach {n v} $args { + dict set current $n $v + } + set user_set_props($key) $current +} + +# Canonical key for the per-object side-channel storage. Vivado's +# `PATH` property gives a unique BD-cell identifier across the +# design; falls back to the raw object string when PATH isn't +# queryable (e.g. for non-BD objects passed through accidentally). +proc ::vw::_user_props_key {obj} { + if {[catch {get_property PATH $obj} p]} { + return $obj + } + return $p +} + +# Return the recorded (name, value) paired list for `obj`. Empty +# list when nothing has been recorded. +proc ::vw::user_props_dict {obj} { + variable user_set_props + if {![info exists user_set_props]} { return [list] } + set key [::vw::_user_props_key $obj] + if {![info exists user_set_props($key)]} { return [list] } + set out [list] + dict for {k v} $user_set_props($key) { + lappend out $k $v + } + return $out +} + +# Same shape as `::vw::props_nested` but seeded from the recorded +# user-set property tally instead of from `list_property` + +# `get_property`. Each value is classified via the structural +# `_lift_value` helper and inserted by dot-split path so the +# result is a nested `Properties` with only the explicitly-set +# sub-keys present. +proc ::vw::user_props_nested {obj} { + set plain [dict create] + foreach {name raw} [::vw::user_props_dict $obj] { + set leaf [::vw::_lift_value $raw] + dict set plain {*}[split $name "."] $leaf + } + return [::vw::_wrap_nested $plain] +} + +# `::vw::props_nested ` returns the FULL output `util::props` +# wants — a nested `Properties` dict where dotted property names +# (CONFIG.X.Y) expand into hierarchy, and each leaf value is +# already a `[list Scalar ]` or `[list Nested ]` tuple. +# +# Lives in the shim (plain Tcl) rather than in user-side htcl +# because: +# - One Vivado RPC for the entire fetch + classification + +# nesting pipeline, vs. one RPC for the fetch + thousands +# of htcl-proc kwargs-envelope invocations per recursive +# sub-key for the post-processing. +# - CPM5 has ~200 top-level properties, each whose value is +# itself a paired-dict with dozens of sub-keys. The htcl- +# side post-processing was hitting tens of thousands of +# kwargs invocations × envelope overhead → minutes. Native +# Tcl inside Vivado does the same work in well under a +# second. +# +# The structural classifier (`::vw::_lift_value`) mirrors what +# `lift::lift_recursive` did in user-htcl: pure shape inference, +# no Vivado lookups. The wrap step (`::vw::_wrap_nested`) walks +# the plain nested dict once and tags intermediate levels as +# `Property::Nested(...)`. Leaves already carry their tag from +# `_lift_value`. +proc ::vw::props_nested {obj} { + set plain [dict create] + foreach name [list_property $obj] { + set raw [get_property $name $obj] + set leaf [::vw::_lift_value $raw] + dict set plain {*}[split $name "."] $leaf + } + return [::vw::_wrap_nested $plain] +} + +# Structural inference on a raw property value. Returns a +# `[list Scalar v]` or `[list Nested inner]` tuple. Mirror of +# lift::looks_like_paired_dict + lift::lift_recursive in plain +# Tcl with no kwargs envelope. +proc ::vw::_lift_value {raw} { + if {[catch {llength $raw} n]} { return [list Scalar $raw] } + if {$n == 0 || $n % 2 != 0} { return [list Scalar $raw] } + foreach {k _v} $raw { + if {![regexp {^[A-Za-z_][A-Za-z0-9_.]*$} $k]} { + return [list Scalar $raw] + } + } + set inner [dict create] + foreach {k v} $raw { + dict set inner $k [::vw::_lift_value $v] + } + return [list Nested $inner] +} + +# Walk a plain nested Tcl dict and wrap each intermediate +# level as `[list Nested ]`. A value is a leaf when +# it's a 2-element list whose head is "Scalar" or "Nested" +# (the existing Property tuple shape). Anything else is a +# sub-dict to descend into. +proc ::vw::_wrap_nested {plain} { + set out [dict create] + dict for {k v} $plain { + if {[llength $v] == 2 \ + && ([lindex $v 0] eq "Scalar" \ + || [lindex $v 0] eq "Nested")} { + dict set out $k $v + } else { + dict set out $k [list Nested [::vw::_wrap_nested $v]] + } + } + return $out +} + +# ---------- send_msg_id override ---------- +# +# Why we override: when Vivado emits a WARNING/ERROR/INFO/CRITICAL +# WARNING via ::common::send_msg_id, the raw line goes to stdout +# with no call-context — the user sees `WARNING: [Common 17-1496] +# ...` and has no way to tell which Tcl proc triggered it. Hooking +# the Tcl entry point lets us capture the call stack at emit time +# and render it as `at file:line in proc` continuation lines. +# +# Tradeoffs to be aware of: +# - The original ::common::send_msg_id is NOT called. That means +# `set_msg_config -id X -suppress` won't suppress Tcl-emitted +# messages (it still works for messages Vivado's C code emits, +# which our PTY-level filter handles). Acceptable for v1; we +# can replicate suppression here if it becomes a real need. +# - Messages emitted from Vivado's C code (synth, route, etc.) +# bypass this override and are caught by the PTY-line filter +# in the worker, with no stack — that's a fundamental limit. + +# True when `str`'s first line looks like a Vivado-standard +# message: starts (after optional leading whitespace) with +# ERROR:/WARNING:/CRITICAL WARNING:/INFO:. Used by the puts +# wrapper to decide whether to attach a stack — we only want +# traces on message-formatted output, not on every `puts hi`. +proc ::vw::is_vivado_message {str} { + set first $str + set nl [string first "\n" $str] + if {$nl >= 0} { + set first [string range $str 0 [expr {$nl - 1}]] + } + set trimmed [string trimleft $first] + if {[string match "ERROR:*" $trimmed]} { return 1 } + if {[string match "CRITICAL WARNING:*" $trimmed]} { return 1 } + if {[string match "WARNING:*" $trimmed]} { return 1 } + if {[string match "INFO:*" $trimmed]} { return 1 } + return 0 +} + +# If `str` looks like a Vivado-style message, append the current +# Tcl call stack as `\n at ` continuation lines and +# return the augmented string. Otherwise return `str` unchanged. +# +# `skip_caller_frames` tells the stack walk how many wrapper +# layers to step past so the deepest reported frame is the user's +# code, not our shim's plumbing. For the puts wrapper that's 2 +# (this helper + the puts wrapper itself). +proc ::vw::attach_stack_if_message {str skip_caller_frames} { + if {![::vw::is_vivado_message $str]} { + return $str + } + set stack [::vw::capture_stack $skip_caller_frames] + if {[llength $stack] == 0} { + return $str + } + set has_trailing_nl 0 + set body $str + if {[string index $str end] eq "\n"} { + set has_trailing_nl 1 + set body [string range $str 0 end-1] + } + foreach frame $stack { + append body "\n at $frame" + } + if {$has_trailing_nl} { append body "\n" } + return $body +} + +# Walk the Tcl call stack starting at the caller of our override +# (`info frame 1` — skipping our wrapper itself) and build a list +# of "at file:line in proc" strings, deepest-first. Uses both +# `info frame` (gives script file/line) and `info level` (gives +# proc name + args) for each depth; merges whatever's available. +# Capped at `$::vw::stack_frame_cap` frames so a 50-deep tclapp +# loader chain doesn't drown the actual message. +# +# Returns at least one entry even when nothing is locatable — +# `(stack: depth=N, no locatable frames)` so the user can +# distinguish "override didn't fire" from "override fired but +# Tcl gave us nothing to render." +proc ::vw::capture_stack {skip_caller_frames} { + variable stack_frame_cap + set out [list] + set depth [info frame] + set level_depth [info level] + # Skip our own frame plus whatever the caller asked us to skip. + set start [expr {1 + $skip_caller_frames}] + for {set i $start} {$i <= $depth} {incr i} { + if {[llength $out] >= $stack_frame_cap} { break } + set frame "" + catch {set frame [info frame -$i]} + # `info level -k` is indexed independently of `info frame` + # — k=0 is the current proc, k=-1 the caller, etc. We map + # frame index i to level index k by clamping; mismatches + # are common (frames can include non-proc evals) but worth + # trying as a fallback. + set level_args "" + set k [expr {$i - $skip_caller_frames - 1}] + if {$k > 0 && $k < $level_depth} { + catch {set level_args [info level -$k]} + } + set entry [::vw::format_frame $frame $level_args] + if {$entry ne ""} { lappend out $entry } + } + if {[llength $out] == 0} { + lappend out "(stack: info-frame-depth=$depth\ + info-level-depth=$level_depth\ + — no locatable frames; message likely\ + emitted from byte-compiled or C-bridged Tcl)" + } + return $out +} + +# Turn one `info frame` dict (and an optional `info level` args +# list as a fallback proc-name source) into the human-readable +# string we render. Drops frames that have nothing locatable at +# all — they're just noise. +proc ::vw::format_frame {frame level_args} { + set proc "" + catch {set proc [dict get $frame proc]} + set file "" + catch {set file [dict get $frame file]} + set line "" + catch {set line [dict get $frame line]} + set cmd "" + catch {set cmd [dict get $frame cmd]} + # `info level -k` returns the proc invocation as `procname + # arg1 arg2 ...`; the first element is the proc name. + if {$proc eq "" && $level_args ne ""} { + set proc [lindex $level_args 0] + } + + # Drop frames that are part of our own plumbing — they're + # always noise to the user. The signal in a stack trace is + # "which line of MY code led to this message"; frames in + # the shim file, the ::vw:: namespace, our send_msg_id + # override, or the ::log:: helpers are all infrastructure. + if {[string match "*vivado-shim.tcl" $file]} { + return "" + } + if {[string match "::vw::*" $proc]} { + return "" + } + if {$proc eq "::common::send_msg_id"} { + return "" + } + if {[string match "::log::*" $proc]} { + return "" + } + + set location "" + if {$file ne "" && $line ne ""} { + set location "${file}:${line}" + } elseif {$line ne ""} { + # `eval` frames without a source file — common for our + # `uplevel #0 $tcl` shim entry — still tell the user + # "line N of the script you submitted." + set location ":${line}" + } + if {$location ne "" && $proc ne ""} { + return "${location} in ${proc}" + } elseif {$location ne ""} { + return $location + } elseif {$proc ne ""} { + return $proc + } elseif {$cmd ne ""} { + # Last-ditch: no proc and no location, but we know what + # command this frame was running. Truncate so a very long + # command doesn't blow out the trace. + set short [string range $cmd 0 80] + if {[string length $cmd] > 80} { append short "..." } + return "(cmd: $short)" + } + return "" +} + +# Severity normalizer. Vivado is inconsistent about case and +# uses underscores in CRITICAL_WARNING; we normalize to the same +# uppercase, space-separated form the PTY-line classifier expects +# so the worker can route warnings/errors to the right StreamKind. +proc ::vw::normalize_severity {sev} { + set s [string toupper [string trim $sev]] + switch -- $s { + "CRITICAL_WARNING" - + "CRITICAL WARNING" { return "CRITICAL WARNING" } + "ERROR" - + "FATAL" - + "FATAL_ERROR" { return "ERROR" } + "WARNING" { return "WARNING" } + "INFO" - + "STATUS" { return "INFO" } + default { return $s } + } +} + +# Install our wrapper *after* Vivado has had a chance to define +# ::common::send_msg_id. If the proc doesn't exist yet (very early +# init, headless mode without the common namespace), we silently +# skip — Vivado's PTY emission still works, just without our +# stack capture. +# +# Logs status once per successful install and once per skipped +# attempt (with the reason), so the user can see in the REPL +# whether the override is live without enabling --verbose. +proc ::vw::install_send_msg_override {} { + if {[info commands ::vw::orig_send_msg_id] ne ""} { + # Already installed — silent on the retry path so we don't + # spam the log on every eval. + return + } + set candidates [info commands ::common::send_msg*] + if {[info commands ::common::send_msg_id] eq ""} { + ::vw::log "::common::send_msg_id not present;\ + ::common::send_msg* = {$candidates};\ + stack-capture override NOT installed" + return + } + rename ::common::send_msg_id ::vw::orig_send_msg_id + + # The Vivado-Tcl signature is `send_msg_id id severity msg + # [optional args]`. We accept the same. + proc ::common::send_msg_id {id severity msg args} { + # Reentrancy guard — if our stack walk somehow triggers + # another send_msg_id, fall back to the original. + if {$::vw::in_send_msg_id} { + return [uplevel 1 [list ::vw::orig_send_msg_id \ + $id $severity $msg {*}$args]] + } + set ::vw::in_send_msg_id 1 + set ok [catch { + set sev_norm [::vw::normalize_severity $severity] + # Skip 1 caller frame so the deepest frame in the + # rendered stack is the one that called send_msg_id, + # not the user proc that called our wrapper. + set stack [::vw::capture_stack 1] + set out "${sev_norm}: \[${id}\] ${msg}" + foreach frame $stack { + append out "\n at ${frame}" + } + if {$::vw::capturing} { + ::vw::stream_stdout $::vw::current_eval_id "$out\n" + } else { + # Outside an eval — fall back to the original so the + # message still appears wherever Vivado normally + # would have put it. + ::vw::orig_send_msg_id $id $severity $msg {*}$args + } + } err] + set ::vw::in_send_msg_id 0 + if {$ok != 0} { + # Our override threw — never let that prevent Vivado from + # at least seeing the message. Fall through to original. + ::vw::log "send_msg_id override failed: $err" + return [uplevel 1 [list ::vw::orig_send_msg_id \ + $id $severity $msg {*}$args]] + } + } + ::vw::log "installed send_msg_id override" +} + +# Wrap `::set_property` so we can attach a Tcl call stack to the +# warnings Vivado emits from its C++ property-validation path. +# Those warnings (notably `[IP_Flow 19-7090] Invalid parameter +# '…' provided, Ignoring`) bypass `::common::send_msg_id` and +# write directly through Vivado's internal message bus to the +# PTY — there's no Tcl frame to grab by the time the bytes arrive +# at the Rust worker. So we capture the stack here, while the +# Tcl interpreter is *about* to enter `set_property`'s C++, +# emit it as a marker the worker recognizes and strips, then +# the worker tags any warnings that arrive while the marker is +# active. Markers go via `::vw::real_puts stdout` so they +# bypass our own `puts` override and land on the PTY directly. +proc ::vw::install_set_property_context {} { + if {[info commands ::vw::orig_set_property_for_ctx] ne ""} { + return + } + if {[info commands ::set_property] eq ""} { return } + rename ::set_property ::vw::orig_set_property_for_ctx + proc ::set_property {args} { + # Skip 1 = this wrapper's own frame. + set frames [::vw::capture_stack 1] + ::vw::emit_pty_ctx_begin $frames + set rc [catch { + uplevel 1 [list ::vw::orig_set_property_for_ctx {*}$args] + } result options] + ::vw::emit_pty_ctx_end + return -options $options $result + } + ::vw::log "installed set_property context wrap" +} + +# Push a context marker onto the PTY. Format: a sentinel-prefixed +# line per frame plus begin/end bookends, so the Rust PTY filter +# can match line-by-line without needing a base64 decoder. +proc ::vw::emit_pty_ctx_begin {frames} { + ::vw::real_puts stdout "__VW_CTX_BEGIN__" + foreach f $frames { + ::vw::real_puts stdout "__VW_CTX_FRAME__:$f" + } + ::vw::real_puts stdout "__VW_CTX_READY__" + flush stdout +} + +proc ::vw::emit_pty_ctx_end {} { + ::vw::real_puts stdout "__VW_CTX_END__" + flush stdout +} + +# ---------- dispatch ---------- + +proc ::vw::dispatch {line} { + if {[catch {::json::json2dict $line} req]} { + ::vw::send_err 0 "protocol parse error: $req" + return + } + if {![dict exists $req id] || ![dict exists $req op]} { + ::vw::send_err 0 "missing id or op" + return + } + set id [dict get $req id] + set op [dict get $req op] + switch -- $op { + eval { + if {![dict exists $req tcl]} { + ::vw::send_err $id "eval request missing tcl field" + return + } + set tcl [dict get $req tcl] + set ::vw::current_eval_id $id + set ::vw::capturing 1 + set rc [catch {uplevel #0 $tcl} result opts] + set ::vw::capturing 0 + if {$rc != 0} { + set ecode "" + set einfo "" + catch {set ecode [dict get $opts -errorcode]} + catch {set einfo [dict get $opts -errorinfo]} + ::vw::send_err $id $result $ecode $einfo + } else { + ::vw::send_ok $id $result + } + } + shutdown { + ::vw::send_ok $id "" + ::vw::log "shim shutting down" + exit 0 + } + default { + ::vw::send_err $id "unknown op: $op" + } + } +} + +# ---------- main ---------- + +if {![info exists ::env(VW_PROTOCOL_ADDR)]} { + ::vw::log "VW_PROTOCOL_ADDR not set; exiting" + exit 1 +} + +if {![regexp {^(.*):(\d+)$} $::env(VW_PROTOCOL_ADDR) -> ::vw::host ::vw::port]} { + ::vw::log "invalid VW_PROTOCOL_ADDR: $::env(VW_PROTOCOL_ADDR)" + exit 1 +} + +if {[catch {socket $::vw::host $::vw::port} sock]} { + ::vw::log "failed to connect to $::vw::host:$::vw::port: $sock" + exit 1 +} + +set ::vw::protocol_sock $sock +fconfigure $sock -buffering line -translation lf + +::vw::log "connected to $::vw::host:$::vw::port" + +# Try installing the send_msg_id override now. If Vivado hasn't +# defined ::common::send_msg_id yet (unusual but possible in +# headless / minimal-mode configurations), the override will be +# re-attempted on the first eval — it's idempotent. +catch {::vw::install_send_msg_override} +catch {::vw::install_set_property_context} + +while {1} { + if {[gets $sock line] < 0} { + if {[eof $sock]} { + ::vw::log "protocol socket closed; exiting" + break + } + continue + } + set line [string trim $line] + if {$line eq ""} { continue } + # Retry installs on each eval until they succeed — both procs + # bail out cheaply once installed. + catch {::vw::install_send_msg_override} + catch {::vw::install_set_property_context} + ::vw::dispatch $line +} + +close $sock +exit 0 diff --git a/vw-vivado/src/lib.rs b/vw-vivado/src/lib.rs new file mode 100644 index 0000000..a8d2af5 --- /dev/null +++ b/vw-vivado/src/lib.rs @@ -0,0 +1,15 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Vivado [`EdaBackend`](vw_eda::EdaBackend) implementation. +//! +//! Spawns `vivado -mode tcl` as a long-lived worker, sources the +//! embedded shim TCL file at startup, and exchanges newline-delimited +//! JSON with it over stdio. Resolution order for the `vivado` +//! executable is: `VW_VIVADO` env var, then `PATH` lookup. v0 supports +//! the `eval` op only; structured ops land in phase 4. + +mod worker; + +pub use worker::{StreamKind, VivadoBackend, VivadoConfig}; diff --git a/vw-vivado/src/worker.rs b/vw-vivado/src/worker.rs new file mode 100644 index 0000000..083688c --- /dev/null +++ b/vw-vivado/src/worker.rs @@ -0,0 +1,1319 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! Vivado worker process: spawn under a PTY, accept the shim's +//! loopback connection, drive the request/response loop. +//! +//! Vivado is spawned with its stdin/stdout/stderr attached to a +//! pseudo-terminal slave (via [`portable_pty`]). The PTY matters: +//! when stdout is a pipe, glibc puts it in full-block-buffering mode +//! and Vivado's banner / source-echo / info messages don't appear +//! until ~4 KB accumulates, which kills the `--verbose` UX. With a +//! PTY Vivado sees a TTY on stdout and switches to line buffering, +//! so output streams as it's produced. `portable_pty` works on Linux +//! and macOS via Unix PTYs, and on Windows via ConPTY. + +use std::fs::File; +use std::io::{BufWriter, Read, Write}; +use std::path::PathBuf; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::Duration; + +use async_trait::async_trait; +use portable_pty::{ + native_pty_system, Child as PtyChild, CommandBuilder, MasterPty, PtySize, +}; +use tempfile::TempDir; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; +use tokio::net::tcp::{OwnedReadHalf, OwnedWriteHalf}; +use tokio::net::TcpListener; +use tracing::{debug, warn}; +use vw_eda::{ + BackendError, EdaBackend, EvalOutput, Request, RequestOp, Response, + ResponseResult, WireMessage, +}; + +/// Embedded shim TCL. Written to a temp file at worker startup and +/// passed to `vivado -source`. +const SHIM_TCL: &str = include_str!("../shim/vivado-shim.tcl"); + +/// How long to wait for the shim to connect back to our loopback +/// listener. Vivado's startup takes most of this on a cold cache; on +/// a warm cache it's a few seconds. +const SHIM_CONNECT_TIMEOUT: Duration = Duration::from_secs(180); + +/// Tag attached to each chunk a [`StdoutSink`] receives, so the +/// caller can route it to the right UI lane. The shim's +/// `puts`-interception path always produces [`StreamKind::Stdout`] +/// — user TCL has no way to "label" a write. The PTY-line filter +/// classifies Vivado's standard message format +/// (`ERROR:`/`WARNING:`/`CRITICAL WARNING:`/`INFO:`) into the +/// corresponding kind. +/// +/// A consumer that doesn't care (e.g. `vw run` capturing for +/// stdout pass-through) can ignore the kind and treat every chunk +/// identically; the REPL uses it to colour error/warning lines. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum StreamKind { + /// User TCL `puts` output, or any other chunk we don't have a + /// reason to label otherwise. Default. + Stdout, + /// Vivado `INFO:` line — usually low-importance chatter from + /// the message system. + Info, + /// Vivado `WARNING:` / `CRITICAL WARNING:` line. + Warning, + /// Vivado `ERROR:` line. Distinct from the final + /// [`BackendError::Tcl`] returned by `eval` — these are + /// emitted *during* an eval and the final error often refers + /// back to them ("failed due to earlier errors"). + Error, +} + +/// Sink for streamed output during an eval. Called once per chunk +/// the worker observes — from the shim's `puts` interception (Tcl +/// user output) or from the PTY-line filter (Vivado's own message +/// system). The [`StreamKind`] tags the chunk so the caller can +/// route warnings and errors to a more attention-grabbing UI +/// surface than ordinary stdout. +pub type StdoutSink = Box; + +/// Spawn-time configuration for [`VivadoBackend`]. +#[derive(Clone, Debug, Default)] +pub struct VivadoConfig { + /// Override the `vivado` executable path. If `None`, resolution + /// order is `$VW_VIVADO`, then a `vivado` lookup on `$PATH`. + pub vivado: Option, + /// Working directory for the spawned process. If `None`, a + /// scratch tempdir is created so Vivado's incidental files don't + /// litter the user's cwd. + pub working_dir: Option, + /// When `true`, forward Vivado's PTY output (banner, source-echo, + /// info messages) as it's produced. When `false` (default), the + /// bytes are read and discarded so they don't pollute either of + /// vw's output streams. User TCL `puts` is always captured per- + /// eval via the shim and streamed in the protocol, independent + /// of this setting. + /// + /// Where the verbose output goes depends on [`verbose_log`](Self::verbose_log): + /// when set, lines stream into that file; when unset, they go + /// to vw's stderr. + pub verbose: bool, + /// Optional path to a log file for verbose output. When set, + /// supersedes the default stderr destination — necessary for + /// the REPL, which owns the terminal in alternate-screen mode + /// and would corrupt the TUI rendering if anyone wrote raw + /// bytes to stderr mid-frame. The file is created (or + /// truncated) at spawn time and flushed per-line so it's safe + /// to `tail -f` from another terminal. + pub verbose_log: Option, +} + +/// Vivado [`EdaBackend`] implementation. +pub struct VivadoBackend { + child: Option>, + /// Master end of the PTY. Kept alive so the slave (Vivado) doesn't + /// receive EOF on its stdin. + _master: Box, + proto_read: BufReader, + proto_write: OwnedWriteHalf, + next_id: AtomicU64, + stdout_pump: Option>, + stdout_sink: Option, + /// Lines the PTY pump has read from Vivado's process stdout, in + /// arrival order. Drained during eval so Vivado's own message + /// system (ERROR/WARNING/CRITICAL WARNING/INFO) reaches the + /// stdout sink alongside user `puts` output — otherwise the + /// "earlier errors" Vivado refers to when failing a command are + /// invisible to the caller. + pty_rx: tokio::sync::mpsc::UnboundedReceiver, + /// Mirrors [`VivadoConfig::verbose`]. When true, PTY lines that + /// don't classify (banner, source-echo, idle chatter) are + /// surfaced — to [`verbose_log`](Self::verbose_log) if set, + /// otherwise to vw's stderr. Classified lines always route + /// through the message filter regardless of verbose. + verbose: bool, + /// Optional log file the verbose firehose streams into. The + /// REPL uses this so verbose output doesn't blow through its + /// TUI alternate screen by hitting stderr. + verbose_log: Option>, + /// Off by default. When true (set via the + /// `VW_TRACE_MESSAGE_SOURCES` env var at spawn time), emit a + /// gray `[vw-pty]` Info line before every classified PTY + /// chunk so the caller can see which path produced it. + /// Useful for diagnosing "where is this warning coming from?" + /// questions; noisy enough that it shouldn't be on by + /// default. + trace_message_sources: bool, + /// Brief-buffer classifier for multi-line PTY warnings. See + /// [`PtyClassifier`] for the merging semantics. + pty_classifier: PtyClassifier, + /// Stack frames the shim sent via `__VW_CTX_*` PTY markers + /// while the most recent `set_property` is in flight. When + /// non-empty, every Warning/Error chunk that lands here gets + /// these frames appended as `\n at ` lines — that's + /// what lets the REPL show "this IP_Flow warning came from + /// configure_cips → create_versal_cips → set_property" even + /// though Vivado's C++ never went through our Tcl stack + /// capture. + active_pty_context: Vec, + /// Frames currently being assembled between + /// `__VW_CTX_BEGIN__` and `__VW_CTX_READY__`. Swapped into + /// `active_pty_context` atomically on READY so a partial + /// marker stream can't leak half-formed traces into emitted + /// warnings. + building_pty_context: Vec, + _shim_dir: TempDir, + _scratch_dir: Option, +} + +impl VivadoBackend { + /// Spawn a Vivado worker under a PTY, wait for the shim to + /// connect back on our loopback listener, and return once we're + /// ready to accept [`EdaBackend::eval`] calls. + pub async fn spawn(config: VivadoConfig) -> Result { + let vivado_path = resolve_vivado(&config)?; + + let shim_dir = tempfile::Builder::new() + .prefix("vw-vivado-shim-") + .tempdir() + .map_err(BackendError::Io)?; + let shim_path = shim_dir.path().join("vivado-shim.tcl"); + tokio::fs::write(&shim_path, SHIM_TCL) + .await + .map_err(BackendError::Io)?; + + let (cwd, scratch_dir) = match &config.working_dir { + Some(dir) => (dir.clone(), None), + None => { + let tmp = tempfile::Builder::new() + .prefix("vw-vivado-cwd-") + .tempdir() + .map_err(BackendError::Io)?; + (tmp.path().to_path_buf(), Some(tmp)) + } + }; + + let listener = TcpListener::bind("127.0.0.1:0") + .await + .map_err(BackendError::Io)?; + let local_addr = listener.local_addr().map_err(BackendError::Io)?; + debug!(?vivado_path, ?shim_path, ?cwd, %local_addr, "spawning vivado worker"); + + let pty_system = native_pty_system(); + let pair = pty_system + .openpty(PtySize { + rows: 24, + cols: 80, + pixel_width: 0, + pixel_height: 0, + }) + .map_err(|e| { + BackendError::Worker(format!("openpty failed: {e}")) + })?; + + // `-mode tcl` keeps Vivado alive as a long-running TCL + // interpreter; once the shim's socket loop takes over, Vivado + // never returns to its interactive prompt. + let mut cmd = CommandBuilder::new(&vivado_path); + cmd.arg("-mode"); + cmd.arg("tcl"); + cmd.arg("-nojournal"); + cmd.arg("-nolog"); + cmd.arg("-source"); + cmd.arg(&shim_path); + cmd.env("VW_PROTOCOL_ADDR", local_addr.to_string()); + cmd.cwd(&cwd); + + let child = pair.slave.spawn_command(cmd).map_err(|e| { + BackendError::Worker(format!( + "failed to spawn vivado at {}: {}", + vivado_path.display(), + e + )) + })?; + // Release our handle to the slave so the master sees EOF + // when the child exits. + drop(pair.slave); + + let reader = pair.master.try_clone_reader().map_err(|e| { + BackendError::Worker(format!("pty reader clone failed: {e}")) + })?; + let (pty_tx, pty_rx) = tokio::sync::mpsc::unbounded_channel::(); + let stdout_pump = spawn_stdout_pump(reader, pty_tx); + + // Wait for the shim to connect. + let accept_result = + tokio::time::timeout(SHIM_CONNECT_TIMEOUT, listener.accept()).await; + let stream = match accept_result { + Ok(Ok((stream, _peer))) => stream, + Ok(Err(e)) => return Err(BackendError::Io(e)), + Err(_) => { + return Err(BackendError::Worker( + "timed out waiting for shim to connect".into(), + )); + } + }; + stream.set_nodelay(true).map_err(BackendError::Io)?; + debug!("shim connected"); + + let (read_half, write_half) = stream.into_split(); + let trace_message_sources = std::env::var("VW_TRACE_MESSAGE_SOURCES") + .map(|v| { + let v = v.trim(); + !v.is_empty() && v != "0" && !v.eq_ignore_ascii_case("false") + }) + .unwrap_or(false); + + let verbose_log = config + .verbose_log + .as_ref() + .map(|p| File::create(p).map(BufWriter::new)) + .transpose() + .map_err(BackendError::Io)?; + + Ok(Self { + child: Some(child), + _master: pair.master, + proto_read: BufReader::new(read_half), + proto_write: write_half, + next_id: AtomicU64::new(1), + stdout_pump: Some(stdout_pump), + stdout_sink: None, + pty_rx, + verbose: config.verbose, + verbose_log, + trace_message_sources, + pty_classifier: PtyClassifier::new(PTY_CONTINUATION_WINDOW), + active_pty_context: Vec::new(), + building_pty_context: Vec::new(), + _shim_dir: shim_dir, + _scratch_dir: scratch_dir, + }) + } + + /// Install a sink that's called per streaming chunk as output + /// is produced during eval. With a sink set, chunks are NOT + /// also accumulated into [`EvalOutput::stdout`] — the sink owns + /// the data, and the caller is expected to display or persist + /// it directly. The [`StreamKind`] argument tags each chunk + /// (user `puts` vs. Vivado's WARNING/ERROR/INFO messages) so + /// the caller can route the chunk to the appropriate UI lane. + pub fn set_stdout_sink(&mut self, sink: F) + where + F: FnMut(StreamKind, &str) + Send + 'static, + { + self.stdout_sink = Some(Box::new(sink)); + } + + fn alloc_id(&self) -> u64 { + self.next_id.fetch_add(1, Ordering::Relaxed) + } + + async fn write_request( + &mut self, + req: &Request, + ) -> Result<(), BackendError> { + let mut line = serde_json::to_string(req)?; + line.push('\n'); + self.proto_write.write_all(line.as_bytes()).await?; + self.proto_write.flush().await?; + Ok(()) + } + + /// Read messages until we get the response that matches + /// `expected_id`. Stream notifications for the same id are routed + /// to [`Self::stdout_sink`] if set, or accumulated into the + /// returned `String` if not. + /// + /// While waiting, the worker also drains the PTY channel — so + /// Vivado's own `send_msg_id` output (ERROR/WARNING/CRITICAL + /// WARNING/INFO lines printed to the process stdout, not through + /// the shim's `puts` interception) reaches the same sink and + /// gets attributed to the in-flight eval. Without this, the + /// "earlier errors" Vivado refers to when a command fails are + /// invisible to the caller. + async fn read_response_for( + &mut self, + expected_id: u64, + ) -> Result<(Response, String), BackendError> { + let mut accumulated = String::new(); + let mut line = String::new(); + loop { + line.clear(); + // Race the protocol socket against the PTY channel. The + // protocol path eventually terminates the loop (a + // Response arrives); PTY lines are forwarded as + // best-effort context until then. + tokio::select! { + biased; + pty = self.pty_rx.recv() => { + let Some(pty_line) = pty else { + // Pump exited: Vivado's PTY closed. + // Stop trying to drain it but keep waiting + // for a Response on the protocol socket — + // most teardown sequences send the + // shutdown ack before the PTY EOFs. + continue; + }; + self.handle_pty_line_during_eval( + &pty_line, + &mut accumulated, + ); + continue; + } + read = self.proto_read.read_line(&mut line) => { + let n = read.map_err(BackendError::Io)?; + if n == 0 { + return Err(BackendError::Worker( + "vivado shim closed protocol socket".into(), + )); + } + } + } + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let msg: WireMessage = + serde_json::from_str(trimmed).map_err(|e| { + BackendError::Worker(format!( + "malformed message from shim: {e}; payload={trimmed}" + )) + })?; + match msg { + WireMessage::Stream(s) if s.id == expected_id => { + if let Some(sink) = self.stdout_sink.as_mut() { + // Default: shim-stream chunks are user + // `puts` output (StreamKind::Stdout). But + // our send_msg_id override in the shim + // also emits via this path — those chunks + // start with a Vivado-standard severity + // prefix (`WARNING:`/`ERROR:`/etc.) which + // we re-use the PTY-line classifier to + // detect. + let kind = classify_chunk_for_sink(&s.data); + // Provenance marker (opt-in via + // VW_TRACE_MESSAGE_SOURCES), only on + // classified (non-Stdout) chunks — plain + // user output doesn't benefit from a + // "came via the shim" tag, but a warning + // does so the user can tell it from a + // PTY-routed one. + if self.trace_message_sources + && kind != StreamKind::Stdout + { + sink( + StreamKind::Info, + &format!( + "[vw-shim-stream] \ + classified-as={kind:?}\n" + ), + ); + } + sink(kind, &s.data); + } else { + accumulated.push_str(&s.data); + } + } + WireMessage::Stream(s) => { + warn!( + got = s.id, + expected = expected_id, + "stream id mismatch; discarding" + ); + } + WireMessage::Response(r) if r.id == expected_id => { + // Force-flush any buffered PTY message — a + // classified line that arrived right before + // the Vivado response would otherwise linger + // in the classifier until the next eval's + // drain. Flushing here means the user always + // sees every classified message attributable + // to the eval before the eval's result. + if let Some((kind, text)) = self.pty_classifier.flush() { + self.emit_pty_chunk(kind, &text, &mut accumulated); + } + return Ok((r, accumulated)); + } + WireMessage::Response(r) => { + warn!( + got = r.id, + expected = expected_id, + "response id mismatch; discarding" + ); + } + } + } + } + + /// Filter a PTY line received during an in-flight eval. Lines + /// matching Vivado's standard message format are forwarded to + /// the stdout sink (or accumulated when there's no sink); + /// everything else (banner, source-echo, idle chatter) is + /// dropped — or stderr-mirrored when `verbose` is set, so a + /// user diagnosing a flaky eval can still get the full firehose. + fn handle_pty_line_during_eval( + &mut self, + line: &str, + accumulated: &mut String, + ) { + if self.consume_ctx_marker(line) { + return; + } + let outcome = + self.pty_classifier.handle(line, std::time::Instant::now()); + for (kind, text) in outcome.chunks { + self.emit_pty_chunk(kind, &text, accumulated); + } + if !outcome.absorbed && self.verbose { + self.write_verbose_line(line); + } + } + + /// Recognize one of the `__VW_CTX_*` lines the shim emits + /// around `set_property`. Returns `true` if the line was a + /// marker (and should be swallowed); `false` if it's a normal + /// PTY line for the classifier. + fn consume_ctx_marker(&mut self, line: &str) -> bool { + let stripped = line.trim_end_matches(['\r', '\n']); + match stripped { + "__VW_CTX_BEGIN__" => { + self.building_pty_context.clear(); + true + } + "__VW_CTX_READY__" => { + self.active_pty_context = + std::mem::take(&mut self.building_pty_context); + true + } + "__VW_CTX_END__" => { + self.active_pty_context.clear(); + self.building_pty_context.clear(); + true + } + _ => { + if let Some(frame) = stripped.strip_prefix("__VW_CTX_FRAME__:") + { + self.building_pty_context.push(frame.to_string()); + true + } else { + false + } + } + } + } + + /// Drain whatever PTY lines have queued up between evals. Two + /// classes of line show up here in practice: + /// + /// 1. **Shim startup logs** (`[vw-shim] ...`) — emitted by + /// Vivado-startup-time code before the user's first eval. + /// Routed via the sink as Info so the user can see + /// whether the send_msg_id override installed. + /// 2. **Vivado messages emitted between evals** (e.g., delayed + /// `WARNING:` from a previous eval's async work, or + /// initialization messages fired by Vivado without any + /// eval in flight). Same deal — route to sink so the user + /// sees them in scrollback. + /// + /// Unclassified lines (banner, source-echo, the Vivado prompt) + /// still drop on the floor — or stderr-mirror if `verbose`. + /// Forwarding those would flood scrollback. + fn drain_pty_between_evals(&mut self) { + // Force-flush any pending PTY message from the previous + // eval first — if the eval ended right after a classified + // line and before any continuation could arrive, we want + // it surfaced before whatever the drain finds. + let mut sink_void = String::new(); + if let Some((kind, text)) = self.pty_classifier.flush() { + self.emit_pty_chunk(kind, &text, &mut sink_void); + } + while let Ok(line) = self.pty_rx.try_recv() { + if self.consume_ctx_marker(&line) { + continue; + } + let outcome = + self.pty_classifier.handle(&line, std::time::Instant::now()); + for (kind, text) in outcome.chunks { + self.emit_pty_chunk(kind, &text, &mut sink_void); + } + if !outcome.absorbed && self.verbose { + self.write_verbose_line(&line); + } + } + // Flush again at end of drain — a classified line that + // landed right before the drain stopped might still be + // buffered. No new lines will arrive before the next + // eval's read_response_for, so we'd rather surface this + // now than wait. + if let Some((kind, text)) = self.pty_classifier.flush() { + self.emit_pty_chunk(kind, &text, &mut sink_void); + } + } + + /// Write one verbose-firehose line — to the log file when + /// configured, otherwise to vw's stderr. Used for unclassified + /// PTY lines we'd otherwise discard. Errors silently because + /// dropping a verbose line shouldn't break the eval. + fn write_verbose_line(&mut self, line: &str) { + if let Some(w) = self.verbose_log.as_mut() { + let _ = writeln!(w, "{line}"); + let _ = w.flush(); + } else { + let _ = writeln!(std::io::stderr(), "{line}"); + } + } + + /// Emit one classified PTY chunk: the optional gray provenance + /// marker (when `trace_message_sources` is on) followed by the + /// chunk itself. Used by both the in-eval and between-eval + /// classification paths so the marker / sink-vs-accumulator + /// rule lives in exactly one place. + /// + /// `[vw-*]` self-diagnostic chunks (shim install logs, future + /// internal tracers) are suppressed unless trace is on. Most + /// users don't care that the shim connected to a port and + /// installed an override — that's housekeeping noise. When + /// something goes wrong, set `VW_TRACE_MESSAGE_SOURCES=1` to + /// surface both these chunks AND the per-message provenance + /// markers. + fn emit_pty_chunk( + &mut self, + kind: StreamKind, + text: &str, + accumulated: &mut String, + ) { + if !self.trace_message_sources && is_vw_log_chunk(text) { + return; + } + // Tag warnings/errors that arrived without a trace with the + // current `set_property` context (frames captured by the + // shim around the in-flight C++ call). This is the path + // that resolves "IP_Flow 19-7090" and friends — they go + // straight from Vivado's C++ to the PTY, bypassing every + // Tcl-side stack-capture hook. + let tagged: String; + let payload: &str = + if matches!(kind, StreamKind::Warning | StreamKind::Error) + && !self.active_pty_context.is_empty() + && !text.contains("\n at ") + { + let trimmed = text.trim_end_matches('\n'); + let mut buf = String::with_capacity(text.len() + 80); + buf.push_str(trimmed); + for frame in &self.active_pty_context { + buf.push_str("\n at "); + buf.push_str(frame); + } + // Restore the trailing newline if the caller had one + // — downstream chunk handling assumes line-terminated. + if text.ends_with('\n') { + buf.push('\n'); + } + tagged = buf; + &tagged + } else { + text + }; + if let Some(sink) = self.stdout_sink.as_mut() { + if self.trace_message_sources { + sink( + StreamKind::Info, + &format!("[vw-pty] classified-as={kind:?}\n"), + ); + } + sink(kind, payload); + } else { + accumulated.push_str(payload); + } + } +} + +/// True when the chunk's first non-whitespace content matches one +/// of our `[vw-*]` self-diagnostic prefixes (see [`VW_LOG_PREFIXES`]). +/// Used by [`VivadoBackend::emit_pty_chunk`] to suppress these +/// chunks when trace isn't enabled. +pub(crate) fn is_vw_log_chunk(text: &str) -> bool { + let trimmed = text.trim_start(); + VW_LOG_PREFIXES.iter().any(|p| trimmed.starts_with(p)) +} + +#[async_trait] +impl EdaBackend for VivadoBackend { + fn name(&self) -> &str { + "vivado" + } + + async fn eval(&mut self, tcl: &str) -> Result { + self.drain_pty_between_evals(); + let id = self.alloc_id(); + let req = Request { + id, + op: RequestOp::Eval { tcl: tcl.into() }, + }; + self.write_request(&req).await?; + let (resp, stdout) = self.read_response_for(id).await?; + match resp.result { + ResponseResult::Ok { result, .. } => { + let value = match result { + serde_json::Value::String(s) => s, + other => other.to_string(), + }; + Ok(EvalOutput { value, stdout }) + } + ResponseResult::Err { error, .. } => Err(BackendError::Tcl { + message: error.message, + code: error.code, + info: error.info, + stdout, + }), + } + } + + async fn send( + &mut self, + mut request: Request, + ) -> Result { + self.drain_pty_between_evals(); + if request.id == 0 { + request.id = self.alloc_id(); + } + let id = request.id; + self.write_request(&request).await?; + let (resp, _stdout) = self.read_response_for(id).await?; + Ok(resp) + } + + async fn shutdown(&mut self) -> Result<(), BackendError> { + if self.child.is_none() { + return Ok(()); + } + let id = self.alloc_id(); + let req = Request { + id, + op: RequestOp::Shutdown, + }; + let _ = self.write_request(&req).await; + let _ = self.read_response_for(id).await; + if let Some(mut child) = self.child.take() { + // Vivado's tear-down is slow; bound it. + let waited = tokio::task::spawn_blocking(move || { + let deadline = + std::time::Instant::now() + Duration::from_secs(10); + loop { + match child.try_wait() { + Ok(Some(status)) => return Ok(status), + Ok(None) => { + if std::time::Instant::now() >= deadline { + let _ = child.kill(); + return child.wait(); + } + std::thread::sleep(Duration::from_millis(100)); + } + Err(e) => return Err(e), + } + } + }) + .await; + match waited { + Ok(Ok(status)) => debug!(?status, "vivado exited"), + Ok(Err(e)) => return Err(BackendError::Io(e)), + Err(e) => warn!(?e, "vivado wait join error"), + } + } + if let Some(handle) = self.stdout_pump.take() { + let _ = handle.join(); + } + Ok(()) + } +} + +impl Drop for VivadoBackend { + fn drop(&mut self) { + if let Some(mut child) = self.child.take() { + let _ = child.kill(); + } + // The pump thread will exit on its own when the PTY master is + // dropped and the read returns EOF. + } +} + +/// Pump Vivado's PTY output in the background. +/// +/// Pump Vivado's process stdout into the worker as a stream of +/// newline-split lines. Runs on a blocking std thread because +/// `portable_pty` only exposes a synchronous `Read`. +/// +/// We always *read* the bytes — otherwise the PTY backpressures and +/// Vivado eventually blocks. Splitting on `\n` here (rather than +/// shipping raw chunks) keeps line semantics consistent for the +/// downstream message-line filter, which works one line at a time. +/// `\r` is stripped — Vivado's PTY output sometimes contains CRLF. +/// +/// The thread exits when the PTY closes (Vivado died) or the +/// receiver is dropped (the worker shut down). +fn spawn_stdout_pump( + mut reader: Box, + tx: tokio::sync::mpsc::UnboundedSender, +) -> std::thread::JoinHandle<()> { + std::thread::spawn(move || { + let mut buf = [0u8; 4096]; + let mut line = String::new(); + loop { + match reader.read(&mut buf) { + Ok(0) => break, + Ok(n) => { + for &b in &buf[..n] { + if b == b'\n' { + let send = std::mem::take(&mut line); + if tx.send(send).is_err() { + return; + } + } else if b != b'\r' { + // Best-effort UTF-8 — replace bytes that + // aren't valid mid-line. Vivado's stdout + // is ASCII in practice. + line.push(b as char); + } + } + } + Err(e) => { + debug!(error = %e, "pty read error"); + break; + } + } + } + // Flush any partial trailing line so EOF doesn't swallow the + // last unterminated message. + if !line.is_empty() { + let _ = tx.send(line); + } + }) +} + +/// Classify `line` as a Vivado standard-format message and +/// translate the prefix into the [`StreamKind`] the sink should +/// receive. Returns `None` for lines that don't match the +/// `common::send_msg_id` prefix set — those are banner / +/// source-echo / idle chatter and don't reach the sink. +/// +/// Conservative on purpose: a false-negative just means a useful +/// line is dropped (recoverable with `verbose=true` for users who +/// need the full firehose), but a false-positive injects banner / +/// source-echo noise into every eval's output, which would degrade +/// the REPL experience for everyone. Leading whitespace is allowed +/// because Vivado occasionally indents within scripted blocks. +/// Classify a multi-line shim-stream chunk for sink routing. The +/// chunk's first line determines its kind: a Vivado-standard +/// severity prefix routes to the matching [`StreamKind`]; +/// anything else falls back to [`StreamKind::Stdout`] (the +/// chunk is treated as user `puts` output). +/// +/// Continuation lines (the `at file:line in proc` frames the +/// send_msg_id override appends) inherit the first-line kind by +/// virtue of being part of the same chunk. The downstream +/// renderer treats each chunk as one scrollback entry. +pub(crate) fn classify_chunk_for_sink(chunk: &str) -> StreamKind { + let first = chunk.lines().next().unwrap_or(""); + classify_vivado_message_line(first).unwrap_or(StreamKind::Stdout) +} + +pub(crate) fn classify_vivado_message_line(line: &str) -> Option { + let l = line.trim_start(); + // `CRITICAL WARNING:` must be checked BEFORE `WARNING:` because + // the latter is a prefix of the former when leading whitespace + // is trimmed. + if l.starts_with("ERROR:") { + Some(StreamKind::Error) + } else if l.starts_with("CRITICAL WARNING:") || l.starts_with("WARNING:") { + Some(StreamKind::Warning) + } else if l.starts_with("INFO:") { + Some(StreamKind::Info) + } else if VW_LOG_PREFIXES.iter().any(|p| l.starts_with(p)) { + // Our own diagnostics — see [`VW_LOG_PREFIXES`] for the + // canonical list. All route as Info: they're gray "where + // did this come from" markers, not warnings. The + // allowlist (rather than a generic `starts_with("[vw-")`) + // prevents a user's `puts "[vw-mystuff] hi"` from getting + // accidentally absorbed. + Some(StreamKind::Info) + } else { + None + } +} + +/// Allowlist of prefixes our shim and worker emit for self- +/// diagnostics. Any line starting with one of these classifies +/// as [`StreamKind::Info`]. +pub(crate) const VW_LOG_PREFIXES: &[&str] = + &["[vw-shim]", "[vw-pty]", "[vw-shim-stream]"]; + +/// Window during which a classified PTY line will absorb a +/// following unclassified line as a continuation. Vivado +/// occasionally emits multi-line messages where the severity +/// prefix only sits on the first line; treating an +/// immediately-following unclassified line as part of the same +/// message renders the warning as one scrollback entry instead +/// of two. +/// +/// 20ms is well above the inter-line latency our PTY pump sees +/// for a single Vivado write (which is sub-millisecond) but +/// well below human reaction time, so a real follow-up message +/// from a *different* call site can't be misattributed. +pub(crate) const PTY_CONTINUATION_WINDOW: std::time::Duration = + std::time::Duration::from_millis(20); + +/// Per-message buffer the worker uses to merge a multi-line PTY +/// warning into one chunk. See [`PtyClassifier`] for the merge +/// semantics. +#[derive(Debug, Clone)] +struct PendingPtyMessage { + kind: StreamKind, + text: String, + arrived_at: std::time::Instant, +} + +/// Outcome of feeding one PTY line through [`PtyClassifier`]. +#[derive(Debug, Default)] +pub(crate) struct ClassifyOutcome { + /// Chunks ready for the sink (or accumulator) in arrival + /// order. At most one *new* classified chunk per call; an + /// additional preceding entry appears only when this call + /// flushed a previously-pending message (either because a + /// new classified line arrived or the window expired). + pub chunks: Vec<(StreamKind, String)>, + /// True when the classifier took responsibility for the + /// input line (stored it as pending, or appended it to a + /// pending message). False when the line was an unclassified + /// non-continuation — caller may stderr-mirror it. + pub absorbed: bool, +} + +/// Brief-buffer classifier for PTY lines. Holds one classified +/// message at a time; an unclassified line arriving within +/// [`PTY_CONTINUATION_WINDOW`] gets folded into it, so a multi- +/// line Vivado warning whose first line carries the severity +/// prefix renders as a single chunk (and thus a single scrollback +/// entry on the App side). +/// +/// Pure / time-injected so it's unit-testable without setting up +/// a worker. +#[derive(Debug)] +pub(crate) struct PtyClassifier { + pending: Option, + window: std::time::Duration, +} + +impl PtyClassifier { + pub fn new(window: std::time::Duration) -> Self { + Self { + pending: None, + window, + } + } + + /// Feed one PTY line. `now` is when the line arrived (taken + /// as a parameter so tests can drive the clock). + pub fn handle( + &mut self, + line: &str, + now: std::time::Instant, + ) -> ClassifyOutcome { + let mut out = ClassifyOutcome::default(); + if let Some(kind) = classify_vivado_message_line(line) { + // A new classified line starts a new pending. Flush + // whatever was pending first — same path as the + // window-expired case below. + if let Some(prev) = self.pending.take() { + out.chunks + .push((prev.kind, with_trailing_newline(&prev.text))); + } + self.pending = Some(PendingPtyMessage { + kind, + text: line.to_string(), + arrived_at: now, + }); + out.absorbed = true; + return out; + } + // Unclassified. Maybe a continuation of the current + // pending warning/error? We only fold for Warning/Error + // kinds because Info messages (`[vw-shim] ...` and + // Vivado `INFO:`) are always single-line in practice — + // and absorbing into them swallowed Vivado's source-echo + // of our shim script (`# catch {...}`, `# while {1} {`, + // etc.) which arrived inside the window during boot. + // Restricting to Warning|Error covers the case we + // actually care about (Vivado occasionally emits multi- + // line WARNING/ERROR text with `\n` between the header + // and a body) without the noise. + if let Some(p) = self.pending.as_mut() { + let merges = + matches!(p.kind, StreamKind::Warning | StreamKind::Error); + if merges && now.duration_since(p.arrived_at) < self.window { + p.text.push('\n'); + p.text.push_str(line); + // Refresh the arrival time so a chain of + // continuation lines all qualifies, not just the + // first. + p.arrived_at = now; + out.absorbed = true; + return out; + } + // Either kind doesn't merge, or the window expired — + // flush the pending. The current line itself is not + // absorbed; caller may stderr-mirror it. + let p = self.pending.take().unwrap(); + out.chunks.push((p.kind, with_trailing_newline(&p.text))); + } + out + } + + /// Force-flush any pending message. Called at eval end so a + /// message buffered right before the Vivado response doesn't + /// linger unseen. + pub fn flush(&mut self) -> Option<(StreamKind, String)> { + self.pending + .take() + .map(|p| (p.kind, with_trailing_newline(&p.text))) + } +} + +fn with_trailing_newline(s: &str) -> String { + if s.ends_with('\n') { + s.to_string() + } else { + format!("{s}\n") + } +} + +fn resolve_vivado(config: &VivadoConfig) -> Result { + if let Some(path) = &config.vivado { + return Ok(path.clone()); + } + if let Ok(env) = std::env::var("VW_VIVADO") { + if !env.is_empty() { + return Ok(PathBuf::from(env)); + } + } + if let Some(paths) = std::env::var_os("PATH") { + for dir in std::env::split_paths(&paths) { + let candidate = dir.join("vivado"); + if candidate.is_file() { + return Ok(candidate); + } + } + } + Err(BackendError::Worker( + "could not find `vivado` on PATH; set $VW_VIVADO or pass \ + VivadoConfig::vivado" + .into(), + )) +} + +#[cfg(test)] +mod tests { + use std::time::{Duration, Instant}; + + use super::{ + classify_chunk_for_sink, classify_vivado_message_line, is_vw_log_chunk, + PtyClassifier, StreamKind, + }; + + fn classifier(window_ms: u64) -> PtyClassifier { + PtyClassifier::new(Duration::from_millis(window_ms)) + } + + #[test] + fn classifier_emits_classified_line_only_on_flush() { + // A classified line gets buffered, not emitted, until + // either a new classified line arrives or flush() is + // called. This is what enables the multi-line-message + // merge that follows. + let mut c = classifier(20); + let t0 = Instant::now(); + let out = c.handle("WARNING: [X 1-1] hi", t0); + assert!(out.chunks.is_empty(), "{:?}", out.chunks); + assert!(out.absorbed); + let flushed = c.flush().expect("pending must flush"); + assert_eq!(flushed.0, StreamKind::Warning); + assert_eq!(flushed.1, "WARNING: [X 1-1] hi\n"); + } + + #[test] + fn classifier_absorbs_unclassified_continuation_within_window() { + let mut c = classifier(20); + let t0 = Instant::now(); + let out = c.handle("WARNING: [X 1-1] header", t0); + assert!(out.absorbed); + let out = c.handle("second body line", t0 + Duration::from_millis(5)); + assert!(out.chunks.is_empty(), "{:?}", out.chunks); + assert!(out.absorbed); + // A third continuation works too (window refreshes on + // each absorb). + let out = c.handle("third line", t0 + Duration::from_millis(15)); + assert!(out.chunks.is_empty(), "{:?}", out.chunks); + assert!(out.absorbed); + let (kind, text) = c.flush().unwrap(); + assert_eq!(kind, StreamKind::Warning); + assert_eq!( + text, + "WARNING: [X 1-1] header\nsecond body line\nthird line\n" + ); + } + + #[test] + fn classifier_flushes_pending_when_window_expires() { + let mut c = classifier(20); + let t0 = Instant::now(); + let out = c.handle("WARNING: [X 1-1] one", t0); + assert!(out.absorbed); + // Unclassified line arrives well past the window: + // pending flushes, line itself is reported as + // not-absorbed so the caller may stderr-mirror it. + let out = c.handle("a", t0 + Duration::from_millis(50)); + assert_eq!(out.chunks.len(), 1); + assert_eq!(out.chunks[0].0, StreamKind::Warning); + assert_eq!(out.chunks[0].1, "WARNING: [X 1-1] one\n"); + assert!(!out.absorbed); + // Nothing further pending. + assert!(c.flush().is_none()); + } + + #[test] + fn classifier_flushes_previous_pending_on_new_classified() { + // Two classified lines back-to-back: the first flushes + // as soon as the second arrives, and the second becomes + // the new pending. + let mut c = classifier(20); + let t0 = Instant::now(); + c.handle("WARNING: [X 1-1] one", t0); + let out = c.handle("ERROR: [Y 1-1] two", t0 + Duration::from_millis(5)); + assert_eq!(out.chunks.len(), 1); + assert_eq!(out.chunks[0].0, StreamKind::Warning); + assert_eq!(out.chunks[0].1, "WARNING: [X 1-1] one\n"); + assert!(out.absorbed); + let (kind, text) = c.flush().unwrap(); + assert_eq!(kind, StreamKind::Error); + assert_eq!(text, "ERROR: [Y 1-1] two\n"); + } + + #[test] + fn vw_log_chunk_detection() { + // Recognized chunks for emit-time suppression. + assert!(is_vw_log_chunk( + "[vw-shim] installed send_msg_id override\n" + )); + assert!(is_vw_log_chunk("[vw-pty] classified-as=Warning\n")); + assert!(is_vw_log_chunk(" [vw-shim-stream] classified-as=Error\n")); + // Real message content stays — never accidentally + // suppress a Vivado warning that mentions our tag in its + // body. + assert!(!is_vw_log_chunk("WARNING: [Common 17-1] no\n")); + assert!(!is_vw_log_chunk( + "INFO: [Common 17-1] something about [vw-shim]\n" + )); + assert!(!is_vw_log_chunk("")); + } + + #[test] + fn classifier_info_kind_does_not_absorb_continuations() { + // Regression guard: Info-kind pending (typically a + // [vw-shim] log line or a Vivado INFO line) must NOT + // absorb subsequent unclassified lines, because those + // are almost always unrelated content arriving in the + // same time window (Vivado source-echo during boot, + // banner lines, etc.). Only Warning/Error kinds merge. + let mut c = classifier(20); + let t0 = Instant::now(); + c.handle("[vw-shim] installed send_msg_id override", t0); + let out = c.handle( + "# catch {::vw::install_send_msg_override}", + t0 + Duration::from_millis(5), + ); + // The pending Info flushed as its own chunk; the source- + // echo line was NOT absorbed. + assert_eq!(out.chunks.len(), 1); + assert_eq!(out.chunks[0].0, StreamKind::Info); + assert_eq!( + out.chunks[0].1, + "[vw-shim] installed send_msg_id override\n" + ); + assert!(!out.absorbed); + } + + #[test] + fn classifier_drops_unclassified_lines_when_no_pending() { + let mut c = classifier(20); + let out = c.handle("plain output, no prefix", Instant::now()); + assert!(out.chunks.is_empty()); + assert!(!out.absorbed); + assert!(c.flush().is_none()); + } + + #[test] + fn classifies_each_standard_prefix_to_its_stream_kind() { + let cases = [ + ( + "ERROR: [Common 17-53] No open project. ...", + StreamKind::Error, + ), + ( + "WARNING: [Coretcl 2-1184] no open project", + StreamKind::Warning, + ), + ( + "CRITICAL WARNING: [Vivado 12-180] ...", + // Critical warnings are still warnings as far as + // the UI is concerned — orange, not red. The + // `CRITICAL` prefix is preserved in the line + // content for the user to see. + StreamKind::Warning, + ), + ( + "INFO: [Vivado 12-3661] auto-pinning enabled", + StreamKind::Info, + ), + ]; + for (line, expected) in cases { + assert_eq!( + classify_vivado_message_line(line), + Some(expected), + "wrong classification for: {line}" + ); + } + } + + #[test] + fn classifies_lines_with_leading_whitespace() { + // Scripted blocks sometimes indent messages — still a + // Vivado-formatted line, still useful to surface. + assert_eq!( + classify_vivado_message_line(" ERROR: [X 1-2] indented"), + Some(StreamKind::Error) + ); + assert_eq!( + classify_vivado_message_line("\tWARNING: [X 1-2] tabbed"), + Some(StreamKind::Warning) + ); + } + + #[test] + fn drops_banner_and_source_echo_and_chatter() { + for line in [ + "", + "Vivado v2024.2 (64-bit)", + "SW Build 5095499 on Wed Nov 13 22:37:05 MST 2024", + "Copyright 1986-2024 Xilinx, Inc.", + "Vivado% ", // The interactive prompt + "source /tmp/vw-vivado-shim/vivado-shim.tcl -notrace", + "create_bd_design metroid", + "errors not at start of line: ERROR: foo", + // Has the substring but not at the start — looks like + // shell or log output, not a message-system line. + "[2024-01-01 12:00] INFO: forwarded by other tool", + ] { + assert_eq!( + classify_vivado_message_line(line), + None, + "should drop: {line:?}" + ); + } + } + + #[test] + fn does_not_match_partial_prefixes() { + // `ERRORS:` would be a hypothetical other label and we + // shouldn't false-positive on it. + assert_eq!(classify_vivado_message_line("ERRORS: bogus prefix"), None); + assert_eq!( + classify_vivado_message_line("INFOMERCIAL: not a message"), + None + ); + } + + #[test] + fn plain_chunk_routes_to_stdout() { + // User `puts hi` produces an ordinary stdout chunk — + // nothing matches the Vivado prefix set so it falls + // through to Stdout. + assert_eq!(classify_chunk_for_sink("hi\n"), StreamKind::Stdout); + assert_eq!( + classify_chunk_for_sink("progress: 42%\n"), + StreamKind::Stdout + ); + assert_eq!(classify_chunk_for_sink(""), StreamKind::Stdout); + } + + #[test] + fn classified_chunk_with_stack_inherits_first_line_kind() { + // The exact shape our send_msg_id override produces: a + // severity-prefixed first line followed by `at ...` + // continuation frames. The whole chunk should route to + // the kind matching the first line, so the warning and + // its stack stay together as one orange (or red) entry. + let warning_with_stack = "WARNING: [Common 17-1496] tclapp out of date\n\ + \x20\x20at /opt/Vivado/foo.tcl:42 in ::tclapp::loader\n\ + \x20\x20at /opt/Vivado/init.tcl:10\n"; + assert_eq!( + classify_chunk_for_sink(warning_with_stack), + StreamKind::Warning + ); + + let error_with_stack = "ERROR: [BD 5-148] no open project\n\ + \x20\x20at /opt/Vivado/bd.tcl:99 in ::bd::create\n"; + assert_eq!( + classify_chunk_for_sink(error_with_stack), + StreamKind::Error + ); + } + + #[test] + fn shim_log_lines_route_as_info() { + // Our shim's own log lines start with `[vw-shim]`. They're + // diagnostic-level info — we don't want them to look like + // a hot error, just a "here's what the worker said" + // notice in the scrollback. + assert_eq!( + classify_vivado_message_line( + "[vw-shim] installed send_msg_id override" + ), + Some(StreamKind::Info) + ); + assert_eq!( + classify_vivado_message_line( + "[vw-shim] ::common::send_msg_id not present; skipping override" + ), + Some(StreamKind::Info) + ); + // The matcher uses an explicit allowlist — see + // VW_LOG_PREFIXES. Each member routes as Info; anything + // outside the list does NOT match, even when it bears + // the `[vw-` shape. + for prefix in super::VW_LOG_PREFIXES { + let line = format!("{prefix} something"); + assert_eq!( + classify_vivado_message_line(&line), + Some(StreamKind::Info), + "should classify our known prefix: {prefix}" + ); + } + // Look-alike inside our namespace that we DIDN'T sanction + // (a future shim subsystem nobody added to the allowlist, + // or a user's puts that happens to bracket `[vw-*]`) is + // rejected — keeps the classifier conservative. + assert_eq!(classify_vivado_message_line("[vw-mystuff] foo"), None); + assert_eq!(classify_vivado_message_line("[other] foo"), None); + } + + #[test] + fn classified_chunk_with_leading_indent_still_routes() { + // `classify_vivado_message_line` tolerates leading + // whitespace; `classify_chunk_for_sink` should inherit + // that — Vivado occasionally indents scripted messages. + let chunk = " WARNING: [X 1-2] indented\n at foo:1\n"; + assert_eq!(classify_chunk_for_sink(chunk), StreamKind::Warning); + } +}