1515from typing import Dict , List , Tuple , Optional
1616from dataclasses import dataclass , field
1717from datetime import datetime
18- from collections import defaultdict
18+ from collections import defaultdict , OrderedDict
1919import hashlib
2020import re
2121from urllib .parse import urlparse
4242 SmartRuleLoader = None
4343 merge_with_main_config = None
4444
45- # 导入占位符模块
46- from .placeholder_modules import (
47- SemanticAnalyzer , UserProfiler , PerformanceMonitor
48- )
45+ # 导入核心分析组件
46+ from .semantic_analyzer import SemanticAnalyzer
47+ from .user_profiler import UserProfiler
48+
49+ try :
50+ from .performance_optimizer import PerformanceMonitor
51+ except ImportError :
52+ PerformanceMonitor = None # type: ignore[misc,assignment]
4953
5054
5155@dataclass
@@ -107,9 +111,9 @@ def __init__(self, config_path: str = "config.json", enable_ml: bool = True, con
107111 self ._ml_classifier : Optional [MLClassifierWrapper ] = None
108112 self ._llm_classifier : Optional [LLMClassifier ] = None
109113
110- # 缓存
114+ # 缓存(OrderedDict 实现 LRU 淘汰)
111115 self .feature_cache : Dict [str , BookmarkFeatures ] = {}
112- self .classification_cache : Dict [str , ClassificationResult ] = {}
116+ self .classification_cache : OrderedDict [str , ClassificationResult ] = OrderedDict ()
113117 self ._max_cache_size = 5000
114118
115119 # 统计
@@ -150,9 +154,12 @@ def user_profiler(self) -> UserProfiler:
150154 return self ._user_profiler
151155
152156 @property
153- def performance_monitor (self ) -> PerformanceMonitor :
154- if self ._performance_monitor is None :
155- self ._performance_monitor = PerformanceMonitor ()
157+ def performance_monitor (self ) -> Optional ['PerformanceMonitor' ]:
158+ if self ._performance_monitor is None and PerformanceMonitor is not None :
159+ try :
160+ self ._performance_monitor = PerformanceMonitor ()
161+ except Exception as e :
162+ self .logger .warning (f"性能监控器初始化失败: { e } " )
156163 return self ._performance_monitor
157164
158165 @property
@@ -300,6 +307,7 @@ def classify(self, url: str, title: str) -> ClassificationResult:
300307 cache_key = hashlib .md5 (f"{ url } ::{ title } " .encode ()).hexdigest ()
301308 if cache_key in self .classification_cache :
302309 self .stats ['cache_hits' ] += 1
310+ self .classification_cache .move_to_end (cache_key ) # LRU 更新
303311 cached = self .classification_cache [cache_key ]
304312 cached .processing_time = (datetime .now () - start_time ).total_seconds ()
305313 return cached
@@ -310,43 +318,38 @@ def classify(self, url: str, title: str) -> ClassificationResult:
310318 # 多方法融合
311319 results : List [ClassificationResult ] = []
312320
321+ def _collect (raw ):
322+ """将 dict / ClassificationResult / None 统一追加到 results。"""
323+ if raw is None :
324+ return
325+ results .append (self ._to_classification_result (raw ))
326+
313327 # 1) 规则引擎
314- rule_result = self .rule_engine .classify (features )
315- if rule_result :
316- results .append (rule_result )
328+ _collect (self .rule_engine .classify (features ))
317329
318330 # 2) 机器学习
319331 if self .ml_classifier :
320- ml_result = self .ml_classifier .classify (features )
321- if ml_result :
322- results .append (ml_result )
332+ _collect (self .ml_classifier .classify (features ))
323333
324334 # 3) 语义分析
325335 if self .config .get ('ai_settings' , {}).get ('use_semantic_analysis' , True ):
326- semantic_result = self .semantic_analyzer .classify (features )
327- if semantic_result :
328- results .append (semantic_result )
336+ _collect (self .semantic_analyzer .classify (features ))
329337
330338 # 4) 用户画像
331339 if self .config .get ('ai_settings' , {}).get ('use_user_profiling' , True ):
332- user_result = self .user_profiler .classify (features )
333- if user_result :
334- results .append (user_result )
340+ _collect (self .user_profiler .classify (features ))
335341
336342 # 5) LLM(可选)
337343 if self .llm_classifier and self .llm_classifier .enabled ():
338344 try :
339- llm_result = self .llm_classifier .classify (
340- url ,
341- title ,
345+ _collect (self .llm_classifier .classify (
346+ url , title ,
342347 context = {
343348 'domain' : features .domain ,
344349 'content_type' : features .content_type ,
345350 'language' : features .language ,
346351 },
347- )
348- if llm_result :
349- results .append (llm_result )
352+ ))
350353 except Exception as e :
351354 self .logger .warning (f"LLM 分类调用失败: { e } " )
352355
@@ -376,11 +379,31 @@ def classify(self, url: str, title: str) -> ClassificationResult:
376379 self ._cache_result (cache_key , final_result )
377380 return final_result
378381
382+ @staticmethod
383+ def _to_classification_result (raw ) -> ClassificationResult :
384+ """将 dict 或 ClassificationResult 统一为 ClassificationResult。"""
385+ if isinstance (raw , ClassificationResult ):
386+ return raw
387+ if isinstance (raw , dict ):
388+ return ClassificationResult (
389+ category = raw .get ('category' , '未分类' ),
390+ confidence = float (raw .get ('confidence' , 0.0 )),
391+ subcategory = raw .get ('subcategory' ),
392+ reasoning = raw .get ('reasoning' , []),
393+ alternatives = raw .get ('alternatives' , []),
394+ processing_time = float (raw .get ('processing_time' , 0.0 )),
395+ method = raw .get ('method' , 'unknown' ),
396+ facets = raw .get ('facets' , {}),
397+ )
398+ raise TypeError (f"Unexpected classification result type: { type (raw )} " )
399+
379400 def _cache_result (self , cache_key : str , result : ClassificationResult ):
380- if len (self .classification_cache ) >= self ._max_cache_size :
381- oldest_key = next (iter (self .classification_cache ))
382- del self .classification_cache [oldest_key ]
383- self .classification_cache [cache_key ] = result
401+ if cache_key in self .classification_cache :
402+ self .classification_cache .move_to_end (cache_key )
403+ else :
404+ if len (self .classification_cache ) >= self ._max_cache_size :
405+ self .classification_cache .popitem (last = False ) # 淘汰最久未使用
406+ self .classification_cache [cache_key ] = result
384407
385408 def _ensemble_classification (self , results : List [ClassificationResult ], features : BookmarkFeatures ) -> ClassificationResult :
386409 if not results :
@@ -406,18 +429,11 @@ def _ensemble_classification(self, results: List[ClassificationResult], features
406429 }
407430
408431 for res in results :
409- if isinstance (res , dict ):
410- method = res .get ('method' , 'unknown' )
411- category = self ._normalize_category_string (res .get ('category' , '未分类' )) or '未分类'
412- confidence = res .get ('confidence' , 0.0 )
413- reasoning = res .get ('reasoning' , [])
414- facets = res .get ('facets' , {}) or {}
415- else :
416- method = res .method
417- category = self ._normalize_category_string (res .category ) or '未分类'
418- confidence = res .confidence
419- reasoning = res .reasoning
420- facets = getattr (res , 'facets' , {}) or {}
432+ method = res .method
433+ category = self ._normalize_category_string (res .category ) or '未分类'
434+ confidence = res .confidence
435+ reasoning = res .reasoning
436+ facets = res .facets or {}
421437
422438 weight = method_weights .get (method , 0.1 )
423439 category_scores [category ] += confidence * weight
0 commit comments