1515"""
1616
1717import concurrent .futures
18+ import copy
1819import json
1920import logging
2021import os
2122import re
2223import time
23- from dataclasses import dataclass , field
24+ from dataclasses import dataclass , field , replace
2425from hashlib import sha256
2526from typing import List , Dict , Any , Optional , Union
2627
@@ -99,6 +100,20 @@ def _log_stage_metrics(
99100 logger .debug ("stage_metrics " + " " .join (log_parts ))
100101
101102
103+ def _canonicalize_memory_result_paths (result : "HybridResult" , canonical_root_path : Optional [str ]) -> tuple [str , str ]:
104+ """Return a stable result filepath plus display path for rolled memory hits."""
105+ if not canonical_root_path :
106+ return result .filepath , result .display_path
107+
108+ if str (result .filepath or "" ).startswith ("recallforge://" ):
109+ return (
110+ f"recallforge://{ result .collection } /{ canonical_root_path } " ,
111+ f"{ result .collection } /{ canonical_root_path } " ,
112+ )
113+
114+ return canonical_root_path , canonical_root_path
115+
116+
102117# Intent-to-weight mappings for RRF fusion
103118# Each intent maps source names to weight multipliers
104119INTENT_WEIGHTS : Dict [str , Dict [str , float ]] = {
@@ -132,6 +147,8 @@ class SearchAudit:
132147 blend_weights : Dict [str , float ] = field (default_factory = dict ) # rrf_weight, rerank_weight
133148 media_compensation_applied : bool = False # Whether media boost was applied in RRF
134149 memory_rollup_boost : float = 1.0 # Multiplier applied when sibling assets are rolled up
150+ memory_primary_evidence_path : Optional [str ] = None
151+ memory_supporting_paths : List [str ] = field (default_factory = list )
135152 final_blended_score : float = 0.0
136153
137154
@@ -157,6 +174,8 @@ class HybridResult:
157174 memory_role : str = "root"
158175 memory_root_path : Optional [str ] = None
159176 memory_hit_count : int = 1
177+ memory_primary_evidence_path : Optional [str ] = None
178+ memory_supporting_paths : Optional [List [str ]] = None
160179 tags : Optional [List [str ]] = None
161180 audit : Optional [SearchAudit ] = None # Per-result audit trail
162181
@@ -1239,15 +1258,57 @@ def _merge_tags(items: List[HybridResult]) -> Optional[List[str]]:
12391258 rolled : List [HybridResult ] = []
12401259 for key in order :
12411260 group = sorted (grouped [key ], key = lambda item : item .score , reverse = True )
1242- representative = group [0 ]
1261+ top_hit = group [0 ]
1262+ root_candidate = next (
1263+ (item for item in group if item .memory_role == "root" ),
1264+ None ,
1265+ )
1266+ representative = replace (root_candidate or top_hit )
1267+ representative .score = top_hit .score
1268+ representative .rrf_rank = top_hit .rrf_rank
1269+ representative .rerank_score = top_hit .rerank_score
1270+ representative .source = top_hit .source
1271+ representative .audit = copy .deepcopy (top_hit .audit ) if top_hit .audit else None
1272+ representative .context = representative .context or top_hit .context
1273+ representative .body = representative .body or top_hit .body
1274+ representative .hash = representative .hash or top_hit .hash
1275+ representative .docid = representative .docid or top_hit .docid
1276+ representative .modified_at = representative .modified_at or top_hit .modified_at
1277+ representative .body_length = representative .body_length or top_hit .body_length
1278+
1279+ canonical_path = representative .memory_root_path or top_hit .memory_root_path
1280+ if canonical_path :
1281+ representative .filepath , representative .display_path = _canonicalize_memory_result_paths (
1282+ representative ,
1283+ canonical_path ,
1284+ )
1285+ if not root_candidate :
1286+ representative .title = os .path .basename (canonical_path )
1287+ representative .memory_root_path = canonical_path
1288+ else :
1289+ representative .memory_root_path = representative .filepath
1290+
1291+ representative .memory_role = "root"
12431292 representative .memory_hit_count = len (group )
12441293 representative .tags = _merge_tags (group )
1294+ representative .memory_primary_evidence_path = top_hit .filepath
1295+ representative .memory_supporting_paths = [
1296+ item .filepath
1297+ for item in group
1298+ if item .filepath not in {representative .filepath , top_hit .filepath }
1299+ ][:5 ]
12451300 memory_rollup_boost = 1.0
12461301 if len (group ) > 1 :
12471302 memory_rollup_boost += min (0.15 , 0.03 * (len (group ) - 1 ))
12481303 representative .score *= memory_rollup_boost
12491304 if representative .audit :
1305+ representative .audit .filepath = representative .filepath
1306+ representative .audit .content_type = representative .content_type
12501307 representative .audit .memory_rollup_boost = memory_rollup_boost
1308+ representative .audit .memory_primary_evidence_path = top_hit .filepath
1309+ representative .audit .memory_supporting_paths = list (
1310+ representative .memory_supporting_paths or []
1311+ )
12511312 representative .audit .final_blended_score = representative .score
12521313 rolled .append (representative )
12531314
@@ -1570,7 +1631,7 @@ def run_single_query(q: BatchQuery) -> List[tuple]:
15701631 return [(r , r .score ) for r in results ]
15711632
15721633 # Run all queries in parallel
1573- all_results : Dict [ int , List [tuple ]] = {}
1634+ all_results : List [ List [tuple ]] = [[] for _ in batch_queries ]
15741635 with concurrent .futures .ThreadPoolExecutor (max_workers = max_workers ) as executor :
15751636 future_to_idx = {
15761637 executor .submit (run_single_query , q ): i
@@ -1587,7 +1648,7 @@ def run_single_query(q: BatchQuery) -> List[tuple]:
15871648 # Merge results using RRF with best-score-wins
15881649 merged : Dict [str , Dict [str , Any ]] = {}
15891650
1590- for idx , results in all_results . items ( ):
1651+ for idx , results in enumerate ( all_results ):
15911652 weight = batch_queries [idx ].weight
15921653 for rank , (result , score ) in enumerate (results ):
15931654 filepath = result .filepath
0 commit comments