Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions crates/tui/src/tools/web_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,14 @@ fn normalize_url(href: &str) -> String {
}

fn normalize_bing_url(href: &str) -> String {
// Bing wraps every SERP result URL in a `/ck/a?...&u=<base64>` click-tracking
// redirect, and in the raw HTML the separators are `&amp;` entities. Without
// decoding entities first, `extract_query_param` looks for `u` but the actual
// key is `amp;u`, so the real URL is never recovered: every result collapses to
// a `bing.com` root domain, which the spam heuristic then rejects — yielding
// zero results for the default Bing backend. Decode entities before parsing.
let href = decode_html_entities(href);
let href = href.as_str();
Comment on lines +910 to +911
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Shadowing the href parameter first with an owned String and then immediately shadowing it again with a &str slice of itself can be confusing to read and maintain. It is cleaner and more idiomatic to use a distinct name for the intermediate owned String (e.g., decoded_href) and then bind href to its slice.

    let decoded_href = decode_html_entities(href);
    let href = decoded_href.as_str();

if let Some(encoded) = extract_query_param(href, "u") {
let decoded = percent_decode(&encoded);
let token = decoded.strip_prefix("a1").unwrap_or(&decoded);
Expand Down Expand Up @@ -1027,11 +1035,22 @@ fn extract_query_param(url: &str, key: &str) -> Option<String> {
mod tests {
use super::{
ERROR_BODY_PREVIEW_BYTES, WebSearchEntry, WebSearchTool, decode_html_entities,
extract_search_query, is_likely_spam_results, optional_search_max_results, root_domain,
sanitize_error_body, truncate_error_body,
extract_search_query, is_likely_spam_results, normalize_bing_url,
optional_search_max_results, root_domain, sanitize_error_body, truncate_error_body,
};
use serde_json::json;

// Regression guard: Bing /ck/a redirect hrefs are HTML-entity-encoded
// (`&amp;`). normalize_bing_url must decode entities before extracting the
// `u=` base64 payload, otherwise the real URL is never recovered and the
// result's root domain collapses to bing.com (then dropped as spam → 0
// results for the default Bing backend).
#[test]
fn bing_ckurl_with_html_entities_decodes_real_url() {
let href = "https://www.bing.com/ck/a?!&amp;&amp;p=abc&amp;u=a1aHR0cHM6Ly9ydXN0LWxhbmcub3JnLw&amp;ntb=1";
assert_eq!(normalize_bing_url(href), "https://rust-lang.org/");
}

fn entry(url: &str) -> WebSearchEntry {
WebSearchEntry {
title: "x".into(),
Expand Down
Loading