2323# SOFTWARE.
2424#
2525###############################################################################
26+ import datetime
2627import re
27- from typing import Union
28+ from typing import Optional , Union
2829
2930from pydantic import BaseModel
3031
@@ -54,6 +55,75 @@ def count(self, val: int):
5455class RegexAnalyzer (DataAnalyzer [TDataModel , TAnalyzeArg ]):
5556 """Parent class for all regex based data analyzers."""
5657
58+ # Class variable for timestamp pattern - can be overridden in subclasses
59+ TIMESTAMP_PATTERN : re .Pattern = re .compile (r"(\d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+)" )
60+
61+ def _extract_timestamp_from_match_position (
62+ self , content : str , match_start : int
63+ ) -> Optional [str ]:
64+ """Extract timestamp from the line where a regex match starts.
65+
66+ Args:
67+ content (str): Full content being analyzed
68+ match_start (int): Start position of the regex match
69+
70+ Returns:
71+ Optional[str]: Extracted timestamp string or None
72+ """
73+ # Get the line where the match starts
74+ line_start = content .rfind ("\n " , 0 , match_start ) + 1
75+ line_end = content .find ("\n " , match_start )
76+ if line_end == - 1 :
77+ line_end = len (content )
78+
79+ first_line = content [line_start :line_end ]
80+
81+ # Extract timestamp from first line only using class pattern
82+ timestamp_match = self .TIMESTAMP_PATTERN .search (first_line )
83+ return timestamp_match .group (1 ) if timestamp_match else None
84+
85+ def _convert_and_extend_error_regex (
86+ self ,
87+ custom_regex : Optional [Union [list [ErrorRegex ], list [dict ]]],
88+ base_regex : list [ErrorRegex ],
89+ ) -> list [ErrorRegex ]:
90+ """Convert custom error patterns and extend base ERROR_REGEX.
91+
92+ Supports two input formats:
93+ - ErrorRegex objects directly
94+ - Dicts with regex/message/category/priority that get converted to ErrorRegex
95+
96+ Args:
97+ custom_regex: Optional list of custom error patterns (ErrorRegex objects or dicts)
98+ base_regex: Base list of ErrorRegex patterns to extend
99+
100+ Returns:
101+ Extended list of ErrorRegex objects (custom patterns + base patterns)
102+
103+ Example:
104+ custom = [
105+ {"regex": r"my-error.*", "message": "Custom error", "event_category": "SW_DRIVER"}
106+ ]
107+ extended = analyzer._convert_and_extend_error_regex(custom, analyzer.ERROR_REGEX)
108+ """
109+ if not custom_regex or not isinstance (custom_regex , list ):
110+ return list (base_regex )
111+
112+ converted_regex = []
113+ for item in custom_regex :
114+ if isinstance (item , ErrorRegex ):
115+ converted_regex .append (item )
116+ elif isinstance (item , dict ):
117+ # Convert dict to ErrorRegex
118+ item ["regex" ] = re .compile (item ["regex" ])
119+ if "event_category" in item :
120+ item ["event_category" ] = EventCategory (item ["event_category" ])
121+ if "event_priority" in item :
122+ item ["event_priority" ] = EventPriority (item ["event_priority" ])
123+ converted_regex .append (ErrorRegex (** item ))
124+
125+ return converted_regex + list (base_regex )
126+
57127 def _build_regex_event (
58128 self , regex_obj : ErrorRegex , match : Union [str , list [str ]], source : str
59129 ) -> RegexEvent :
@@ -82,15 +152,28 @@ def _build_regex_event(
82152 )
83153
84154 def check_all_regexes (
85- self , content : str , source : str , error_regex : list [ErrorRegex ], group = True
155+ self ,
156+ content : str ,
157+ source : str ,
158+ error_regex : list [ErrorRegex ],
159+ group : bool = True ,
160+ num_timestamps : int = 3 ,
161+ interval_to_collapse_event : int = 60 ,
86162 ) -> list [RegexEvent ]:
87163 """Iterate over all ERROR_REGEX and check content for any matches
88164
165+ Enhanced with timestamp-based event collapsing:
166+ - Extracts timestamps from matched lines
167+ - Collapses events within interval_to_collapse_event seconds
168+ - Prunes timestamp lists to keep first N and last N timestamps
169+
89170 Args:
90171 content (str): content to match regex on
91172 source (str): descriptor for content
92173 error_regex (list[ErrorRegex]): list of regex objects to match
93174 group (bool, optional): flag to control whether matches should be grouped together. Defaults to True.
175+ num_timestamps (int, optional): maximum number of timestamps to keep for each event. Defaults to 3.
176+ interval_to_collapse_event (int, optional): time interval in seconds to collapse events. Defaults to 60.
94177
95178 Returns:
96179 list[RegexEvent]: list of regex event objects
@@ -99,8 +182,39 @@ def check_all_regexes(
99182 regex_map : dict [str , RegexEvent ] = {}
100183 regex_event_list : list [RegexEvent ] = []
101184
185+ def _is_within_interval (new_timestamp_str : str , existing_timestamps : list [str ]) -> bool :
186+ """Check if new timestamp is within the specified interval of any existing timestamp"""
187+ try :
188+ new_dt = datetime .datetime .fromisoformat (new_timestamp_str .replace ("," , "." ))
189+ except Exception as e :
190+ self .logger .warning (
191+ f"WARNING: Failed to parse date from timestamp: { new_timestamp_str } . Error: { e } "
192+ )
193+ return False
194+
195+ if not new_dt :
196+ return False
197+
198+ for existing_ts in existing_timestamps :
199+ try :
200+ existing_dt = datetime .datetime .fromisoformat (existing_ts .replace ("," , "." ))
201+ if (
202+ existing_dt
203+ and abs ((new_dt - existing_dt ).total_seconds ()) < interval_to_collapse_event
204+ ):
205+ return True
206+ except Exception :
207+ continue
208+ return False
209+
102210 for error_regex_obj in error_regex :
103- for match in error_regex_obj .regex .findall (content ):
211+ for match_obj in error_regex_obj .regex .finditer (content ):
212+ # Extract timestamp from the line where match occurs
213+ timestamp = self ._extract_timestamp_from_match_position (content , match_obj .start ())
214+
215+ match = match_obj .groups () if match_obj .groups () else match_obj .group (0 )
216+
217+ # Process multi-line matches
104218 if isinstance (match , str ) and "\n " in match :
105219 match = match .strip ().split ("\n " )
106220
@@ -110,11 +224,52 @@ def check_all_regexes(
110224 if len (match ) == 1 :
111225 match = match [0 ]
112226
113- if group and str (match ) in regex_map :
114- regex_map [str (match )].count += 1
227+ # Create match key for grouping
228+ match_key = str (match )
229+
230+ if group and match_key in regex_map :
231+ # Increment count for existing match
232+ existing_event = regex_map [match_key ]
233+ existing_event .count += 1
234+
235+ # Add timestamp to timestamps list if we have one
236+ if timestamp :
237+ timestamps_list = existing_event .data .get ("timestamps" , [])
238+ # Check if new timestamp is within the specified interval of existing ones
239+ if not _is_within_interval (timestamp , timestamps_list ):
240+ timestamps_list .append (timestamp )
241+ existing_event .data ["timestamps" ] = timestamps_list
242+
115243 elif group :
116- regex_map [str (match )] = self ._build_regex_event (error_regex_obj , match , source )
244+ # Create new grouped event
245+ new_event = self ._build_regex_event (error_regex_obj , match , source )
246+
247+ # Add timestamp information
248+ if timestamp :
249+ new_event .data ["timestamps" ] = [timestamp ]
250+
251+ regex_map [match_key ] = new_event
252+
117253 else :
118- regex_event_list .append (self ._build_regex_event (error_regex_obj , match , source ))
254+ # Create individual event (no grouping)
255+ new_event = self ._build_regex_event (error_regex_obj , match , source )
256+
257+ # Add single timestamp
258+ if timestamp :
259+ new_event .data ["timestamp" ] = timestamp
260+
261+ regex_event_list .append (new_event )
262+
263+ all_events = list (regex_map .values ()) if group else regex_event_list
264+
265+ # Prune timestamp lists to keep only first N and last N timestamps
266+ for event in all_events :
267+ timestamps_list = event .data .get ("timestamps" , [])
268+ if isinstance (timestamps_list , list ) and len (timestamps_list ) > 2 * num_timestamps :
269+ # Keep first num_timestamps and last num_timestamps
270+ pruned_timestamps = (
271+ timestamps_list [:num_timestamps ] + timestamps_list [- num_timestamps :]
272+ )
273+ event .data ["timestamps" ] = pruned_timestamps
119274
120- return list ( regex_map . values ()) if group else regex_event_list
275+ return all_events
0 commit comments