From 210277d477b021b4dbd8d35cf1c855ea2b2a6771 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Sat, 4 Jul 2026 18:38:43 -0500 Subject: [PATCH] feat(fetcher): add optional rakers rendering --- CHANGELOG.md | 4 + Cargo.lock | 992 ++++++++++++++++++++++-- Cargo.toml | 3 + README.md | 18 + crates/fetchkit-cli/Cargo.toml | 1 + crates/fetchkit-cli/src/main.rs | 24 + crates/fetchkit/Cargo.toml | 2 + crates/fetchkit/src/client.rs | 5 +- crates/fetchkit/src/error.rs | 4 + crates/fetchkit/src/fetchers/default.rs | 139 +++- crates/fetchkit/src/tool.rs | 54 +- crates/fetchkit/src/types.rs | 28 + crates/fetchkit/tests/integration.rs | 118 +++ specs/fetchers.md | 33 + specs/maintenance.md | 3 +- specs/threat-model.md | 1 + 16 files changed, 1345 insertions(+), 84 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32527c1..6f86aed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Optional `render-rakers` feature for explicit, per-request lightweight JS/DOM rendering before markdown/text conversion. The backend is disabled by default, exposed only when a host enables it, and denies rakers-initiated subresource network requests. + ## [0.4.1] - 2026-07-04 ### Highlights diff --git a/Cargo.lock b/Cargo.lock index 7261f73..b2de300 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,6 +32,15 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "1.0.0" @@ -116,9 +125,9 @@ version = "0.1.89" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -127,6 +136,12 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + [[package]] name = "aws-lc-rs" version = "1.17.0" @@ -212,6 +227,12 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.11.1" @@ -253,6 +274,19 @@ dependencies = [ "rand_core 0.10.1", ] +[[package]] +name = "chrono" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + [[package]] name = "clap" version = "4.6.1" @@ -282,9 +316,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" dependencies = [ "heck", - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -348,6 +382,15 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -420,6 +463,29 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "cssparser" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9cdaae01d5ed7882b04d795e7f752f46ff52d2fa3b50a20d28c464510bba98" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf 0.13.1", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a2a99df6e410a8ff4245aa2006499ea662245f967cc7c0a38c83ef8eb44dbf" +dependencies = [ + "quote 1.0.45", + "syn 2.0.117", +] + [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -442,9 +508,9 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -475,6 +541,27 @@ dependencies = [ "zeroize", ] +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "rustc_version", + "syn 2.0.117", +] + [[package]] name = "digest" version = "0.10.7" @@ -502,9 +589,47 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", +] + +[[package]] +name = "dlopen" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e80ad39f814a9abe68583cd50a2d45c8a67561c3361ab8da240587dda80937" +dependencies = [ + "dlopen_derive", + "lazy_static", + "libc", + "winapi", +] + +[[package]] +name = "dlopen_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f236d9e1b1fbd81cea0f9cbdc8dcc7e8ebcd80e6659cd7cb2ad5f6c05946c581" +dependencies = [ + "libc", + "quote 0.6.13", + "syn 0.15.44", +] + +[[package]] +name = "dtoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", ] [[package]] @@ -544,6 +669,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "ego-tree" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b04dc5a38e4f151a79d9f2451ae6037fb6eaf5cba34771f44781f80e508498e3" + +[[package]] +name = "either" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -585,7 +722,8 @@ dependencies = [ "ed25519-dalek", "futures", "libc", - "rand", + "rakers", + "rand 0.10.1", "reqwest", "schemars", "serde", @@ -673,6 +811,16 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.32" @@ -727,9 +875,9 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -771,6 +919,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -858,6 +1015,30 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever 0.12.1", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", +] + +[[package]] +name = "html5ever" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46a1761807faccc9a19e86944bbf40610014066306f96edcdedc2fb714bcb7b8" +dependencies = [ + "log", + "markup5ever 0.39.0", +] + [[package]] name = "http" version = "1.4.0" @@ -974,6 +1155,30 @@ dependencies = [ "windows-registry", ] +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.2.0" @@ -1062,6 +1267,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -1136,11 +1347,11 @@ version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" dependencies = [ - "proc-macro2", - "quote", + "proc-macro2 1.0.106", + "quote 1.0.45", "rustc_version", "simd_cesu8", - "syn", + "syn 2.0.117", ] [[package]] @@ -1158,8 +1369,8 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" dependencies = [ - "quote", - "syn", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -1214,6 +1425,15 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.29" @@ -1226,6 +1446,49 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +dependencies = [ + "log", + "phf 0.11.3", + "phf_codegen 0.11.3", + "string_cache 0.8.9", + "string_cache_codegen 0.5.4", + "tendril 0.4.3", +] + +[[package]] +name = "markup5ever" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7122d987ec5f704ee56f6e5b41a7d93722e9aae27ae07cafa4036c4d3f9757de" +dependencies = [ + "log", + "tendril 0.5.0", + "web_atoms", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18" +dependencies = [ + "html5ever 0.27.0", + "markup5ever 0.12.1", + "tendril 0.4.3", + "xml5ever", +] + [[package]] name = "matchers" version = "0.2.0" @@ -1268,6 +1531,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -1277,6 +1546,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.17.0" @@ -1305,12 +1583,126 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + [[package]] name = "percent-encoding" version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared 0.11.3", +] + +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared 0.13.1", + "serde", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_codegen" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" +dependencies = [ + "phf_generator 0.13.1", + "phf_shared 0.13.1", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared 0.11.3", + "rand 0.8.6", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared 0.13.1", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator 0.13.1", + "phf_shared 0.13.1", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -1342,14 +1734,39 @@ dependencies = [ "zerovec", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "prettyplease" version = "0.2.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ - "proc-macro2", - "syn", + "proc-macro2 1.0.106", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +dependencies = [ + "unicode-xid 0.1.0", ] [[package]] @@ -1400,10 +1817,10 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ac53762fd065daa3194dd09337a38bd793a188100fd1a9304c4ab312d901771" dependencies = [ - "proc-macro2", + "proc-macro2 1.0.106", "pyo3-macros-backend", - "quote", - "syn", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -1413,9 +1830,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ca3a1557399783172dc5bf39cfca835157732532cba56b71d2292161e53b362" dependencies = [ "heck", - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -1448,7 +1865,7 @@ dependencies = [ "bytes", "getrandom 0.4.2", "lru-slab", - "rand", + "rand 0.10.1", "rand_pcg", "ring", "rustc-hash", @@ -1475,13 +1892,22 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +dependencies = [ + "proc-macro2 0.4.30", +] + [[package]] name = "quote" version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ - "proc-macro2", + "proc-macro2 1.0.106", ] [[package]] @@ -1496,6 +1922,32 @@ version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" +[[package]] +name = "rakers" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eca21a92c48a8a31c33aece6ce624b51fc399245da9feb1d239f8a98e8d60714" +dependencies = [ + "anyhow", + "clap", + "html5ever 0.27.0", + "markup5ever_rcdom", + "rquickjs", + "scraper", + "similar", + "ureq", + "url", +] + +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "rand_core 0.6.4", +] + [[package]] name = "rand" version = "0.10.1" @@ -1531,6 +1983,15 @@ dependencies = [ "rand_core 0.10.1", ] +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + [[package]] name = "ref-cast" version = "1.0.25" @@ -1546,9 +2007,9 @@ version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -1580,6 +2041,12 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "reqwest" version = "0.13.3" @@ -1637,6 +2104,61 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rquickjs" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d16661bff09e9ed8e01094a188b463de45ec0693ade55b92ed54027d7ba7c40c" +dependencies = [ + "either", + "indexmap", + "rquickjs-core", + "rquickjs-macro", +] + +[[package]] +name = "rquickjs-core" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8db6379e204ef84c0811e90e7cc3e3e4d7688701db68a00d14a6db6849087b" +dependencies = [ + "chrono", + "dlopen", + "either", + "indexmap", + "phf 0.11.3", + "relative-path", + "rquickjs-sys", +] + +[[package]] +name = "rquickjs-macro" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6041104330c019fcd936026ae05e2446f5e8a2abef329d924f25424b7052a2f3" +dependencies = [ + "convert_case", + "fnv", + "ident_case", + "indexmap", + "phf_generator 0.11.3", + "phf_shared 0.11.3", + "proc-macro-crate", + "proc-macro2 1.0.106", + "quote 1.0.45", + "rquickjs-core", + "syn 2.0.117", +] + +[[package]] +name = "rquickjs-sys" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bc352c6b663604c3c186c000cfcc6c271f4b50bc135a285dd6d4f2a42f9790a" +dependencies = [ + "cc", +] + [[package]] name = "rustc-hash" version = "2.1.2" @@ -1672,7 +2194,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" dependencies = [ "aws-lc-rs", + "log", "once_cell", + "ring", "rustls-pki-types", "rustls-webpki", "subtle", @@ -1783,10 +2307,31 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f" dependencies = [ - "proc-macro2", - "quote", + "proc-macro2 1.0.106", + "quote 1.0.45", "serde_derive_internals", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "scraper" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdd0be4d296f048bfb06dd01bbc80ef789ddd2e55583e8d2e6b804942abfabc2" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever 0.39.0", + "precomputed-hash", + "selectors", + "tendril 0.5.0", ] [[package]] @@ -1812,6 +2357,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8adfa1c298912827b8a28b223b3b874357397ae706e6190acd9bf28cee99114d" +dependencies = [ + "bitflags", + "cssparser", + "derive_more", + "log", + "new_debug_unreachable", + "phf 0.13.1", + "phf_codegen 0.13.1", + "precomputed-hash", + "rustc-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "semver" version = "1.0.28" @@ -1843,9 +2407,9 @@ version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -1854,9 +2418,9 @@ version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -1872,6 +2436,15 @@ dependencies = [ "zmij", ] +[[package]] +name = "servo_arc" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha2" version = "0.10.9" @@ -1940,6 +2513,18 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + +[[package]] +name = "siphasher" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649" + [[package]] name = "slab" version = "0.4.12" @@ -1962,6 +2547,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "socks" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + [[package]] name = "spki" version = "0.7.3" @@ -1978,6 +2574,55 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "string_cache" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared 0.11.3", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18596f8c785a729f2819c0f6a7eae6ebeebdfffbfe4214ae6b087f690e31901" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared 0.13.1", + "precomputed-hash", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", + "proc-macro2 1.0.106", + "quote 1.0.45", +] + +[[package]] +name = "string_cache_codegen" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "585635e46db231059f76c5849798146164652513eb9e8ab2685939dd90f29b69" +dependencies = [ + "phf_generator 0.13.1", + "phf_shared 0.13.1", + "proc-macro2 1.0.106", + "quote 1.0.45", +] + [[package]] name = "strsim" version = "0.11.1" @@ -1990,14 +2635,25 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "syn" +version = "0.15.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "unicode-xid 0.1.0", +] + [[package]] name = "syn" version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ - "proc-macro2", - "quote", + "proc-macro2 1.0.106", + "quote 1.0.45", "unicode-ident", ] @@ -2016,9 +2672,9 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -2061,6 +2717,27 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "tendril" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4790fc369d5a530f4b544b094e31388b9b3a37c0f4652ade4505945f5660d24" +dependencies = [ + "new_debug_unreachable", + "utf-8", +] + [[package]] name = "thiserror" version = "2.0.18" @@ -2076,9 +2753,9 @@ version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -2136,9 +2813,9 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -2186,6 +2863,23 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" + +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + [[package]] name = "tower" version = "0.5.3" @@ -2253,9 +2947,9 @@ version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] @@ -2315,6 +3009,24 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-segmentation" +version = "1.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -2327,6 +3039,23 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "socks", + "url", + "webpki-roots 0.26.11", +] + [[package]] name = "url" version = "2.5.8" @@ -2339,6 +3068,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -2435,7 +3170,7 @@ version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" dependencies = [ - "quote", + "quote 1.0.45", "wasm-bindgen-macro-support", ] @@ -2446,9 +3181,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" dependencies = [ "bumpalo", - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -2528,6 +3263,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web_atoms" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "075474b12bcb3d2e3d4546580e9de478eeeead668a1761e2a8860c836b7ef297" +dependencies = [ + "phf 0.13.1", + "phf_codegen 0.13.1", + "string_cache 0.9.0", + "string_cache_codegen 0.6.1", +] + [[package]] name = "webpki-root-certs" version = "1.0.7" @@ -2537,6 +3284,40 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.8", +] + +[[package]] +name = "webpki-roots" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf85cb06032201fa7c6f829d7db5a7e5aa45bcc0655327713065f6f0576731bf" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -2546,6 +3327,47 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", +] + [[package]] name = "windows-link" version = "0.2.1" @@ -2737,6 +3559,15 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + [[package]] name = "wiremock" version = "0.6.5" @@ -2796,7 +3627,7 @@ dependencies = [ "heck", "indexmap", "prettyplease", - "syn", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -2810,9 +3641,9 @@ checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" dependencies = [ "anyhow", "prettyplease", - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -2850,7 +3681,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "unicode-xid", + "unicode-xid 0.2.6", "wasmparser", ] @@ -2860,6 +3691,17 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" +[[package]] +name = "xml5ever" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bbb26405d8e919bc1547a5aa9abc95cbfa438f04844f5fdd9dc7596b748bf69" +dependencies = [ + "log", + "mac", + "markup5ever 0.12.1", +] + [[package]] name = "yoke" version = "0.8.2" @@ -2877,9 +3719,9 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", "synstructure", ] @@ -2898,9 +3740,9 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", "synstructure", ] @@ -2938,9 +3780,9 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.106", + "quote 1.0.45", + "syn 2.0.117", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index a431033..0dc72e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,9 @@ tower = { version = "0.5", features = ["util"] } # Platform filesystem flags libc = "0.2" +# Optional rendered fetching +rakers = { version = "0.1.7", default-features = false, features = ["rquickjs"] } + # Crypto (bot-auth feature) ed25519-dalek = { version = "2", features = ["rand_core"] } base64 = "0.22" diff --git a/README.md b/README.md index e486d5f..a4828c5 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,9 @@ fetchkit fetch https://example.com --hardened # Discover a small same-origin page map for an agent fetchkit fetch https://example.com --content-focus agent --crawl --max-pages 5 +# Optional JS/DOM rendering for simple SPAs/docs (requires render-rakers feature) +fetchkit fetch https://example.com/app --render-rakers + # Show full documentation fetchkit --llmtxt ``` @@ -131,6 +134,19 @@ Add to `Cargo.toml`: fetchkit = "0.2" ``` +Optional rendered fetching: + +```toml +[dependencies] +fetchkit = { version = "0.2", features = ["render-rakers"] } +``` + +`render-rakers` is not enabled by default. It is lightweight partial rendering: +inline JavaScript can update the DOM before markdown/text conversion, but it is +not a full browser engine. FetchKit blocks rakers-initiated subresource network +access in this mode; the initial page still uses FetchKit's normal URL, DNS, +proxy, timeout, and size policies. + ### Basic Fetch ```rust @@ -224,6 +240,7 @@ response = tool.fetch("https://example.com") | `max_pages` | int? | Maximum crawl pages, including the seed; default 5, max 20 | | `if_none_match` | string? | ETag for conditional `If-None-Match` | | `if_modified_since` | string? | Timestamp for conditional `If-Modified-Since` | +| `render` | string? | `"rakers"` to opt into rendered fetch when enabled | ## Response Fields @@ -249,6 +266,7 @@ response = tool.fetch("https://example.com") | `word_count` | int? | Word count of returned content | | `redirect_chain` | string[] | URLs visited during redirects (empty if none) | | `is_paywall` | bool? | Heuristic paywall signal (soft, not guaranteed) | +| `rendered_by` | string? | Rendering backend used before conversion, e.g. `"rakers"` | ## Error Handling diff --git a/crates/fetchkit-cli/Cargo.toml b/crates/fetchkit-cli/Cargo.toml index 38e6a4e..b7c5c06 100644 --- a/crates/fetchkit-cli/Cargo.toml +++ b/crates/fetchkit-cli/Cargo.toml @@ -19,6 +19,7 @@ doc = false [features] default = [] bot-auth = ["fetchkit/bot-auth"] +render-rakers = ["fetchkit/render-rakers"] [dependencies] fetchkit = { path = "../fetchkit", version = "0.4.1" } diff --git a/crates/fetchkit-cli/src/main.rs b/crates/fetchkit-cli/src/main.rs index 32be1e1..0724ace 100644 --- a/crates/fetchkit-cli/src/main.rs +++ b/crates/fetchkit-cli/src/main.rs @@ -100,6 +100,10 @@ enum Commands { /// Maximum crawl pages, including the seed #[arg(long, default_value_t = 5)] max_pages: usize, + + /// Render HTML with the rakers backend before conversion + #[arg(long)] + render_rakers: bool, }, } @@ -126,6 +130,7 @@ async fn main() { allow_env_proxy, bot_auth_key, bot_auth_agent, + false, )) .await; } @@ -140,6 +145,7 @@ async fn main() { content_focus, crawl, max_pages, + render_rakers, }) => { let options = FetchCommandOptions { output, @@ -151,6 +157,7 @@ async fn main() { content_focus, crawl, max_pages, + render_rakers, }; run_fetch(&url, options).await; } @@ -169,6 +176,7 @@ fn build_tool( allow_env_proxy: bool, bot_auth_key: Option, bot_auth_agent: Option, + render_rakers: bool, ) -> Tool { let mut builder = Tool::builder().enable_markdown(true); @@ -206,6 +214,17 @@ fn build_tool( let _ = bot_auth_agent; // suppress unused warning without feature + #[cfg(feature = "render-rakers")] + if render_rakers { + builder = builder.enable_render_rakers(true); + } + + #[cfg(not(feature = "render-rakers"))] + if render_rakers { + eprintln!("Error: --render-rakers requires the render-rakers feature (rebuild with --features render-rakers)"); + std::process::exit(1); + } + builder.build() } @@ -219,6 +238,7 @@ struct FetchCommandOptions { content_focus: Option, crawl: bool, max_pages: usize, + render_rakers: bool, } async fn run_fetch(url: &str, options: FetchCommandOptions) { @@ -230,12 +250,16 @@ async fn run_fetch(url: &str, options: FetchCommandOptions) { if options.crawl { request = request.crawl(true).max_pages(options.max_pages); } + if options.render_rakers { + request = request.render_rakers(); + } let tool = build_tool( options.user_agent, options.hardened, options.allow_env_proxy, options.bot_auth_key, options.bot_auth_agent, + options.render_rakers, ); // Execute request diff --git a/crates/fetchkit/Cargo.toml b/crates/fetchkit/Cargo.toml index 9105716..a68d1fb 100644 --- a/crates/fetchkit/Cargo.toml +++ b/crates/fetchkit/Cargo.toml @@ -14,6 +14,7 @@ readme = "../../README.md" default = [] bot-auth = ["dep:ed25519-dalek", "dep:base64", "dep:sha2", "dep:rand"] live-tests = [] +render-rakers = ["dep:rakers"] [dependencies] tokio = { workspace = true } @@ -29,6 +30,7 @@ bytes = { workspace = true } async-trait = { workspace = true } tower = { workspace = true } libc = { workspace = true } +rakers = { workspace = true, optional = true } # Optional: bot-auth feature ed25519-dalek = { workspace = true, optional = true } diff --git a/crates/fetchkit/src/client.rs b/crates/fetchkit/src/client.rs index d568483..c54cd4d 100644 --- a/crates/fetchkit/src/client.rs +++ b/crates/fetchkit/src/client.rs @@ -41,6 +41,8 @@ pub struct FetchOptions { pub blocked_hosts: Vec, /// Restrict redirects to the original host only. pub same_host_redirects_only: bool, + /// Enable rakers-rendered HTML fetching. The request must still opt in. + pub enable_render_rakers: bool, /// Web Bot Authentication config (draft-meunier-web-bot-auth-architecture). /// When set, outgoing requests are signed with Ed25519. #[cfg(feature = "bot-auth")] @@ -69,7 +71,8 @@ impl std::fmt::Debug for FetchOptions { .field("respect_proxy_env", &self.respect_proxy_env) .field("allowed_ports", &self.allowed_ports) .field("blocked_hosts", &self.blocked_hosts) - .field("same_host_redirects_only", &self.same_host_redirects_only); + .field("same_host_redirects_only", &self.same_host_redirects_only) + .field("enable_render_rakers", &self.enable_render_rakers); #[cfg(feature = "bot-auth")] d.field("bot_auth", &self.bot_auth); d.field( diff --git a/crates/fetchkit/src/error.rs b/crates/fetchkit/src/error.rs index 7877476..a209c97 100644 --- a/crates/fetchkit/src/error.rs +++ b/crates/fetchkit/src/error.rs @@ -57,6 +57,10 @@ pub enum FetchError { /// No FileSaver provided but save_to_file was requested #[error("File saving not available")] SaverNotAvailable, + + /// Rendered fetch requested but not enabled or not compiled in + #[error("Rendered fetch backend not available")] + RenderNotAvailable, } impl FetchError { diff --git a/crates/fetchkit/src/fetchers/default.rs b/crates/fetchkit/src/fetchers/default.rs index bf6c409..70095f0 100644 --- a/crates/fetchkit/src/fetchers/default.rs +++ b/crates/fetchkit/src/fetchers/default.rs @@ -87,6 +87,9 @@ const FIRST_BYTE_TIMEOUT: Duration = Duration::from_secs(1); // THREAT[TM-DOS-002]: Body timeout caps total request duration pub(crate) const BODY_TIMEOUT: Duration = Duration::from_secs(30); +#[cfg(feature = "render-rakers")] +const RAKERS_SCRIPT_TIMEOUT: Duration = Duration::from_secs(5); + /// Truncation message appended when body is cut short (timeout or size limit) pub(crate) const TRUNCATION_MESSAGE: &str = "\n\n[..content truncated...]"; @@ -236,6 +239,7 @@ impl Fetcher for DefaultFetcher { let method = request.effective_method(); let wants_markdown = options.enable_markdown && request.wants_markdown(); let wants_text = options.enable_text && request.wants_text(); + validate_rakers_render_request(&request, options)?; let max_body_size = options.max_body_size.unwrap_or(DEFAULT_MAX_BODY_SIZE); let accept = if wants_markdown { @@ -325,14 +329,18 @@ impl Fetcher for DefaultFetcher { let size = body.len() as u64; // Convert to string - let content = String::from_utf8_lossy(&body).to_string(); - - // Detect paywall before content is moved by conversion - let is_paywall = detect_paywall(&content); + let mut content = String::from_utf8_lossy(&body).to_string(); // Determine format and convert if needed // THREAT[TM-DOS-006]: Conversion input is bounded by max_body_size let is_html_content = is_html(&meta.content_type, &content); + let rendered_by = if is_html_content && request.wants_rakers_render() { + content = render_html_with_rakers(content, final_url.clone(), options).await?; + Some("rakers".to_string()) + } else { + None + }; + let is_paywall = detect_paywall(&content); let wants_main = request.wants_main_content(); let wants_readable = request.wants_readable_content(); let wants_agent = request.wants_agent_content(); @@ -430,6 +438,7 @@ impl Fetcher for DefaultFetcher { word_count: Some(word_count), redirect_chain, is_paywall: if is_paywall { Some(true) } else { None }, + rendered_by, ..Default::default() }) } @@ -669,6 +678,128 @@ fn redirect_target( Ok(Some(next_url)) } +fn validate_rakers_render_request( + request: &FetchRequest, + options: &FetchOptions, +) -> Result<(), FetchError> { + if !request.wants_rakers_render() { + return Ok(()); + } + + if !options.enable_render_rakers { + return Err(FetchError::RenderNotAvailable); + } + + #[cfg(feature = "render-rakers")] + { + Ok(()) + } + + #[cfg(not(feature = "render-rakers"))] + { + Err(FetchError::RenderNotAvailable) + } +} + +#[cfg(feature = "render-rakers")] +async fn render_html_with_rakers( + html: String, + page_url: String, + options: &FetchOptions, +) -> Result { + let user_agent = options.user_agent.clone(); + tokio::task::spawn_blocking(move || { + let deny_proxy = DenyProxy::new().map_err(|_| { + FetchError::RequestError( + "failed to initialize rendered-fetch network guard".to_string(), + ) + })?; + let cfg = rakers::HttpConfig { + user_agent, + headers: Vec::new(), + proxy: Some(deny_proxy.url()), + forward_headers: false, + }; + + rakers::render( + &html, + false, + Some(&page_url), + &cfg, + true, + Some(0), + Some(RAKERS_SCRIPT_TIMEOUT), + ) + .map_err(|err| FetchError::FetcherError(format!("rakers render failed: {err}"))) + }) + .await + .map_err(|_| FetchError::FetcherError("rakers render task failed".to_string()))? +} + +#[cfg(not(feature = "render-rakers"))] +async fn render_html_with_rakers( + _html: String, + _page_url: String, + _options: &FetchOptions, +) -> Result { + Err(FetchError::RenderNotAvailable) +} + +#[cfg(feature = "render-rakers")] +struct DenyProxy { + addr: std::net::SocketAddr, + stop: std::sync::Arc, + handle: Option>, +} + +#[cfg(feature = "render-rakers")] +impl DenyProxy { + fn new() -> std::io::Result { + let listener = std::net::TcpListener::bind("127.0.0.1:0")?; + listener.set_nonblocking(true)?; + let addr = listener.local_addr()?; + let stop = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); + let thread_stop = std::sync::Arc::clone(&stop); + let handle = std::thread::spawn(move || { + while !thread_stop.load(std::sync::atomic::Ordering::Relaxed) { + match listener.accept() { + Ok((mut stream, _)) => { + let _ = std::io::Write::write_all( + &mut stream, + b"HTTP/1.1 403 Forbidden\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", + ); + } + Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => { + std::thread::sleep(Duration::from_millis(5)); + } + Err(_) => break, + } + } + }); + + Ok(Self { + addr, + stop, + handle: Some(handle), + }) + } + + fn url(&self) -> String { + format!("http://{}", self.addr) + } +} + +#[cfg(feature = "render-rakers")] +impl Drop for DenyProxy { + fn drop(&mut self) { + self.stop.store(true, std::sync::atomic::Ordering::Relaxed); + let _ = std::net::TcpStream::connect(self.addr); + if let Some(handle) = self.handle.take() { + let _ = handle.join(); + } + } +} + /// Check if content type indicates binary content fn is_binary_content_type(content_type: &str) -> bool { let ct_lower = content_type.to_lowercase(); diff --git a/crates/fetchkit/src/tool.rs b/crates/fetchkit/src/tool.rs index dfe39e4..c21de61 100644 --- a/crates/fetchkit/src/tool.rs +++ b/crates/fetchkit/src/tool.rs @@ -144,6 +144,8 @@ pub struct ToolBuilder { blocked_hosts: Vec, /// Restrict redirects to the original host only. same_host_redirects_only: bool, + /// Enable rakers-rendered HTML fetching. Requests still opt in with render=rakers. + enable_render_rakers: bool, /// Web Bot Authentication config. #[cfg(feature = "bot-auth")] bot_auth: Option, @@ -168,7 +170,8 @@ impl std::fmt::Debug for ToolBuilder { .field("respect_proxy_env", &self.respect_proxy_env) .field("allowed_ports", &self.allowed_ports) .field("blocked_hosts", &self.blocked_hosts) - .field("same_host_redirects_only", &self.same_host_redirects_only); + .field("same_host_redirects_only", &self.same_host_redirects_only) + .field("enable_render_rakers", &self.enable_render_rakers); #[cfg(feature = "bot-auth")] d.field("bot_auth", &self.bot_auth); d.field( @@ -247,6 +250,16 @@ impl ToolBuilder { self } + /// Enable the rakers rendered-fetch backend. + /// + /// This method is only available when built with the `render-rakers` feature. + /// Requests must still set `render: "rakers"` to use it. + #[cfg(feature = "render-rakers")] + pub fn enable_render_rakers(mut self, enable: bool) -> Self { + self.enable_render_rakers = enable; + self + } + /// Allow outbound requests to a specific port. /// /// If no ports are configured, any URL port is allowed. @@ -383,6 +396,7 @@ impl ToolBuilder { allowed_ports: self.allowed_ports.clone(), blocked_hosts: self.blocked_hosts.clone(), same_host_redirects_only: self.same_host_redirects_only, + enable_render_rakers: self.enable_render_rakers, #[cfg(feature = "bot-auth")] bot_auth: self.bot_auth.clone(), transport: self.transport.clone(), @@ -418,6 +432,7 @@ impl ToolBuilder { self.enable_markdown, self.enable_text, self.enable_save_to_file, + self.enable_render_rakers, ) } @@ -449,6 +464,7 @@ pub struct Tool { allowed_ports: Vec, blocked_hosts: Vec, same_host_redirects_only: bool, + enable_render_rakers: bool, #[cfg(feature = "bot-auth")] bot_auth: Option, transport: Option>, @@ -474,7 +490,8 @@ impl std::fmt::Debug for Tool { .field("respect_proxy_env", &self.respect_proxy_env) .field("allowed_ports", &self.allowed_ports) .field("blocked_hosts", &self.blocked_hosts) - .field("same_host_redirects_only", &self.same_host_redirects_only); + .field("same_host_redirects_only", &self.same_host_redirects_only) + .field("enable_render_rakers", &self.enable_render_rakers); #[cfg(feature = "bot-auth")] d.field("bot_auth", &self.bot_auth); d.field( @@ -551,6 +568,7 @@ impl Tool { self.enable_markdown, self.enable_text, self.enable_save_to_file, + self.enable_render_rakers, ) } @@ -652,6 +670,7 @@ impl Tool { allowed_ports: self.allowed_ports.clone(), blocked_hosts: self.blocked_hosts.clone(), same_host_redirects_only: self.same_host_redirects_only, + enable_render_rakers: self.enable_render_rakers, #[cfg(feature = "bot-auth")] bot_auth: self.bot_auth.clone(), // None => default ReqwestTransport; hosts inject a custom transport via @@ -755,6 +774,7 @@ fn validate_args(tool: &Tool, args: &Value) -> Result<(), ToolError> { "as_text" => tool.enable_text, "save_to_file" => tool.enable_save_to_file, "content_focus" | "if_none_match" | "if_modified_since" | "crawl" | "max_pages" => true, + "render" => tool.enable_render_rakers, _ => false, }; @@ -792,6 +812,7 @@ fn build_input_schema( enable_markdown: bool, enable_text: bool, enable_save_to_file: bool, + enable_render_rakers: bool, ) -> Value { let mut properties = Map::new(); properties.insert( @@ -830,6 +851,17 @@ fn build_input_schema( ); } + if enable_render_rakers { + properties.insert( + "render".to_string(), + json!({ + "type": "string", + "enum": ["rakers"], + "description": "Optional rendered fetch backend. rakers is lightweight partial JS/DOM rendering, not a full browser." + }), + ); + } + properties.insert( "content_focus".to_string(), json!({ @@ -902,7 +934,8 @@ fn build_output_schema() -> Value { "quality": {"type": "object"}, "crawl": {"type": "object"}, "redirect_chain": {"type": "array", "items": {"type": "string"}}, - "is_paywall": {"type": "boolean"} + "is_paywall": {"type": "boolean"}, + "rendered_by": {"type": "string", "enum": ["rakers"]} }, "required": ["url", "status_code"], "additionalProperties": false @@ -1056,6 +1089,16 @@ fn build_help(tool: &Tool) -> String { parameter_description(tool.locale(), "max_pages"), )); + if tool.enable_render_rakers { + rows.push(table_row( + "render", + "string", + "no", + "—", + parameter_description(tool.locale(), "render"), + )); + } + let adapters = if tool.enable_save_to_file { if is_ukrainian(tool.locale()) { "- `FileSaver` (необов’язковий): потрібен, коли задано `save_to_file`.\n" @@ -1145,6 +1188,7 @@ fn parameter_description(locale: &str, field: &str) -> &'static str { (true, "content_focus") => "`full`, `main`, `readable`, або `agent`", (true, "crawl") => "Обмежене same-origin виявлення сторінок для агентів", (true, "max_pages") => "Максимум сторінок для crawl, включно з початковою", + (true, "render") => "Необов’язковий backend рендерингу: `rakers`", (false, "url") => "HTTP/HTTPS URL, or a bare domain URL normalized to `https://`", (false, "method") => "`GET` or `HEAD`", (false, "as_markdown") => "Convert HTML to markdown", @@ -1153,6 +1197,7 @@ fn parameter_description(locale: &str, field: &str) -> &'static str { (false, "content_focus") => "`full`, `main`, `readable`, or `agent`", (false, "crawl") => "Bounded same-origin page discovery for agents", (false, "max_pages") => "Maximum crawl pages, including the seed", + (false, "render") => "Optional rendered-fetch backend: `rakers`", _ => "", } } @@ -1174,6 +1219,7 @@ fn map_fetch_error(locale: &str, err: FetchError) -> ToolError { FetchError::FetcherError(_) => user_error(locale, user_text(locale, "fetcher_error")), FetchError::SaveError(_) => user_error(locale, user_text(locale, "save_error")), FetchError::SaverNotAvailable => user_error(locale, user_text(locale, "saver_missing")), + FetchError::RenderNotAvailable => user_error(locale, user_text(locale, "render_missing")), } } @@ -1209,6 +1255,7 @@ fn user_text(locale: &str, key: &str) -> &'static str { (true, "fetcher_error") => "Не вдалося обробити відповідь цього URL.", (true, "save_error") => "Не вдалося зберегти файл. Перевірте шлях призначення.", (true, "saver_missing") => "save_to_file потребує адаптер FileSaver.", + (true, "render_missing") => "render потребує увімкнений backend рендерингу.", (false, "missing_url") => "url is required.", (false, "invalid_scheme") => "URL must be http://, https://, or a bare domain URL.", (false, "invalid_method") => "Method must be GET or HEAD.", @@ -1221,6 +1268,7 @@ fn user_text(locale: &str, key: &str) -> &'static str { (false, "fetcher_error") => "Could not process the response for this URL.", (false, "save_error") => "Could not save the file. Check the destination path.", (false, "saver_missing") => "save_to_file requires the FileSaver adapter.", + (false, "render_missing") => "render requires an enabled rendered-fetch backend.", _ => "Tool execution failed.", } } diff --git a/crates/fetchkit/src/types.rs b/crates/fetchkit/src/types.rs index 78ed257..0ab7c10 100644 --- a/crates/fetchkit/src/types.rs +++ b/crates/fetchkit/src/types.rs @@ -31,6 +31,14 @@ pub enum HttpMethod { Head, } +/// Optional browser-rendering backend. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "kebab-case")] +pub enum RenderMode { + /// Lightweight rakers-based JavaScript/DOM rendering. + Rakers, +} + impl FromStr for HttpMethod { type Err = String; @@ -120,6 +128,11 @@ pub struct FetchRequest { /// Maximum pages to fetch when crawl discovery is enabled, including the seed. #[serde(default, skip_serializing_if = "Option::is_none")] pub max_pages: Option, + + /// Optional browser rendering backend. Disabled unless the tool/options + /// explicitly enable the requested backend. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub render: Option, } impl FetchRequest { @@ -198,6 +211,12 @@ impl FetchRequest { self } + /// Render HTML through the rakers backend before conversion. + pub fn render_rakers(mut self) -> Self { + self.render = Some(RenderMode::Rakers); + self + } + /// Get the effective method (default to GET) pub fn effective_method(&self) -> HttpMethod { self.method.unwrap_or_default() @@ -241,6 +260,11 @@ impl FetchRequest { pub fn wants_crawl(&self) -> bool { self.crawl.unwrap_or(false) } + + /// Check if rakers rendering is requested. + pub fn wants_rakers_render(&self) -> bool { + self.render == Some(RenderMode::Rakers) + } } fn canonical_fetch_url(raw_url: &str) -> Result { @@ -543,6 +567,10 @@ pub struct FetchResponse { /// Heuristic paywall detection (soft signal, not guaranteed) #[serde(skip_serializing_if = "Option::is_none")] pub is_paywall: Option, + + /// Rendering backend used before conversion, if any. + #[serde(skip_serializing_if = "Option::is_none")] + pub rendered_by: Option, } #[cfg(test)] diff --git a/crates/fetchkit/tests/integration.rs b/crates/fetchkit/tests/integration.rs index d8aadaa..950625f 100644 --- a/crates/fetchkit/tests/integration.rs +++ b/crates/fetchkit/tests/integration.rs @@ -164,6 +164,124 @@ async fn test_html_to_markdown() { assert!(content.contains("- Item 2")); } +#[tokio::test] +async fn test_render_rakers_request_requires_enabled_backend() { + let result = fetch_with_options( + FetchRequest::new("https://example.com") + .as_markdown() + .render_rakers(), + FetchOptions { + enable_markdown: true, + enable_text: true, + ..Default::default() + }, + ) + .await; + + assert!(matches!(result, Err(FetchError::RenderNotAvailable))); +} + +#[test] +fn test_tool_schema_hides_render_by_default() { + let schema = Tool::builder().build_input_schema(); + assert!(schema["properties"].get("render").is_none()); +} + +#[cfg(feature = "render-rakers")] +#[test] +fn test_tool_schema_exposes_render_when_rakers_enabled() { + let schema = Tool::builder() + .enable_render_rakers(true) + .build_input_schema(); + assert_eq!(schema["properties"]["render"]["enum"][0], "rakers"); +} + +#[cfg(feature = "render-rakers")] +#[tokio::test] +async fn test_render_rakers_executes_inline_js_before_markdown_conversion() { + let server = MockServer::start().await; + let html = r#" + + +
Loading
+ + +"#; + + Mock::given(method("GET")) + .and(path("/spa")) + .respond_with(ResponseTemplate::new(200).set_body_raw(html, "text/html")) + .mount(&server) + .await; + + let tool = Tool::builder() + .block_private_ips(false) + .enable_render_rakers(true) + .build(); + let response = tool + .execute( + FetchRequest::new(format!("{}/spa", server.uri())) + .as_markdown() + .render_rakers(), + ) + .await + .unwrap(); + + assert_eq!(response.rendered_by.as_deref(), Some("rakers")); + let content = response.content.unwrap(); + assert!(content.contains("# Rendered Inline")); + assert!(content.contains("Ready")); + assert!(!content.contains("Loading")); +} + +#[cfg(feature = "render-rakers")] +#[tokio::test] +async fn test_render_rakers_does_not_fetch_page_subresources() { + let server = MockServer::start().await; + let html = r#" + + +

Fallback

+ + +"#; + + Mock::given(method("GET")) + .and(path("/spa")) + .respond_with(ResponseTemplate::new(200).set_body_raw(html, "text/html")) + .mount(&server) + .await; + Mock::given(method("GET")) + .and(path("/api")) + .respond_with(ResponseTemplate::new(200).set_body_string("leak")) + .mount(&server) + .await; + + let tool = Tool::builder() + .block_private_ips(false) + .enable_render_rakers(true) + .build(); + let response = tool + .execute( + FetchRequest::new(format!("{}/spa", server.uri())) + .as_markdown() + .render_rakers(), + ) + .await + .unwrap(); + + assert_eq!(response.rendered_by.as_deref(), Some("rakers")); + assert!(response.content.unwrap().contains("# Fetched")); + let requests = server.received_requests().await.unwrap(); + assert!(!requests.iter().any(|req| req.url.path() == "/api")); +} + #[tokio::test] async fn test_html_to_text() { let mock_server = MockServer::start().await; diff --git a/specs/fetchers.md b/specs/fetchers.md index 80b97ed..e6046a6 100644 --- a/specs/fetchers.md +++ b/specs/fetchers.md @@ -172,6 +172,39 @@ Hosts that consume fetchkit through the `Tool` surface inject the transport with honors it, so the host keeps Tool's description/schema/llmtxt and FetchOptions assembly while owning egress. +### Browser-Rendered Fetching + +Browser-rendered fetching is optional and MUST NOT be enabled by default. +It is a fetcher/render-backend concern, not an `HttpTransport` concern: +rendering needs page lifecycle, JavaScript execution, subresource policy, +DOM snapshotting, and wait strategy, while `HttpTransport` remains a +single-hop socket adapter. + +The first lightweight rendered mode MUST be exposed explicitly behind a +Cargo feature named `render-rakers`. It may use the rakers-style approach: +parse HTML, execute JavaScript in a lightweight runtime with a partial DOM, +serialize the post-execution DOM, then pass that HTML through the existing +markdown/text conversion path. + +`render-rakers` requirements: +- Disabled unless the `render-rakers` Cargo feature is enabled. +- Not part of default features. +- Documented as partial browser rendering, not a full browser engine. +- Best-effort for SPAs and client-rendered docs; no guarantee for pages that + require real layout, WebGL, service workers, browser fingerprinting, or a + complete DOM/CSS engine. +- Must honor fetchkit URL validation, allow/block lists, DNS policy, proxy + policy, timeout policy, and body-size limits for the initial page. +- Must not let the rakers runtime bypass fetchkit egress policy. Until + subresource fetches can be routed through fetchkit policy, rakers-initiated + external script, fetch, and XHR requests must be denied. +- Must expose an explicit request/config switch; enabling the Cargo feature + only makes the backend available and does not change default fetch behavior. + +Future real-browser rendering MUST be a separate backend and feature flag, for +example `render-servo`. Servo support must not reuse the `render-rakers` feature +because it has different dependency, fidelity, security, and platform tradeoffs. + ### Configuration Fetchers receive `FetchOptions` for: diff --git a/specs/maintenance.md b/specs/maintenance.md index 95c553f..3e33167 100644 --- a/specs/maintenance.md +++ b/specs/maintenance.md @@ -24,7 +24,8 @@ code where the API has changed. `CHANGELOG.md`. 4. **Verify lockfile** - Ensure `Cargo.lock` reflects the updated versions 5. **Build & test** - `cargo build --workspace && cargo test --workspace` must pass - after updates, with and without optional features (e.g. `--features bot-auth`) + after updates, with and without optional features (e.g. `--features bot-auth`; + `--features render-rakers` once implemented) 6. **Audit advisories** - Run `cargo audit` (if available) to check for known vulnerabilities ### 2. Documentation Quality (docs.rs / rustdoc) diff --git a/specs/threat-model.md b/specs/threat-model.md index fb9b8e4..ce12cf2 100644 --- a/specs/threat-model.md +++ b/specs/threat-model.md @@ -438,6 +438,7 @@ None — all previously open threats have been mitigated. | Path traversal prevention | TM-INPUT | Lexical normalization plus save-time parent-directory symlink rejection in `LocalFileSaver` | | Save feature gating | TM-INPUT | `enable_save_to_file` disabled by default; schema gated | | Bot-auth feature gating | TM-AUTH | `bot-auth` Cargo feature disabled by default; no crypto deps unless opted in | +| Rendered fetch feature gating | TM-SSRF, TM-NET, TM-DOS | Browser-rendered fetching disabled by default; lightweight rakers-style rendering must be gated by `render-rakers`, require an explicit request/config switch, apply fetchkit URL, DNS, proxy, timeout, and body-size policy to the initial page, and deny rakers-initiated subresource network requests unless they can be routed through fetchkit policy | | Signature nonce + timestamps | TM-AUTH | 32-byte random nonce + created/expires per signature prevents replay | | Authority-scoped signatures | TM-AUTH | Signature covers `@authority`; per-host binding | | Graceful signing failure | TM-AUTH | Signing errors logged, request proceeds unsigned |