1515from pathlib import Path
1616
1717import dask .dataframe as dd
18+ import numpy as np
1819from natsort import natsorted
1920
2021from sed .core .logging import set_verbosity
@@ -79,6 +80,21 @@ def verbose(self, verbose: bool):
7980 self ._verbose = verbose
8081 set_verbosity (logger , self ._verbose )
8182
83+ def __len__ (self ) -> int :
84+ """
85+ Returns the total number of rows in the electron resolved dataframe.
86+
87+ Returns:
88+ int: Total number of rows.
89+ """
90+ try :
91+ file_statistics = self .metadata ["file_statistics" ]["electron" ]
92+ except KeyError as exc :
93+ raise KeyError ("File statistics missing. Use 'read_dataframe' first." ) from exc
94+
95+ total_rows = sum (stats ["num_rows" ] for stats in file_statistics .values ())
96+ return total_rows
97+
8298 def _initialize_dirs (self ) -> None :
8399 """
84100 Initializes the directories on Maxwell based on configuration. If paths is provided in
@@ -223,12 +239,57 @@ def parse_metadata(self, token: str = None) -> dict:
223239
224240 return metadata
225241
226- def get_count_rate (
227- self ,
228- fids : Sequence [int ] = None , # noqa: ARG002
229- ** kwds , # noqa: ARG002
230- ):
231- return None , None
242+ def get_count_rate (self , fids = None , ** kwds ) -> tuple [np .ndarray , np .ndarray ]:
243+ """
244+ Calculates the count rate using the number of rows and elapsed time for each file.
245+ Hence the resolution is not very high, but this method is very fast.
246+
247+ Args:
248+ fids (Sequence[int]): A sequence of file IDs. Defaults to all files.
249+
250+ Keyword Args:
251+ runs: A sequence of run IDs.
252+
253+ Returns:
254+ tuple[np.ndarray, np.ndarray]: The count rate and elapsed time in seconds.
255+
256+ Raises:
257+ KeyError: If the file statistics are missing.
258+ """
259+
260+ def counts_per_file (fid ):
261+ try :
262+ file_statistics = self .metadata ["file_statistics" ]["electron" ]
263+ except KeyError as exc :
264+ raise KeyError ("File statistics missing. Use 'read_dataframe' first." ) from exc
265+
266+ counts = file_statistics [str (fid )]["num_rows" ]
267+ return counts
268+
269+ runs = kwds .pop ("runs" , None )
270+ if len (kwds ) > 0 :
271+ raise TypeError (f"get_count_rate() got unexpected keyword arguments { kwds .keys ()} ." )
272+ all_counts = []
273+ elapsed_times = []
274+ if runs is not None :
275+ fids = []
276+ for run_id in runs :
277+ if self .raw_dir is None :
278+ self ._initialize_dirs ()
279+ files = self .get_files_from_run_id (run_id = run_id , folders = self .raw_dir )
280+ for file in files :
281+ fids .append (self .files .index (file ))
282+ else :
283+ if fids is None :
284+ fids = range (len (self .files ))
285+
286+ for fid in fids :
287+ all_counts .append (counts_per_file (fid ))
288+ elapsed_times .append (self .get_elapsed_time (fids = [fid ]))
289+
290+ count_rate = np .array (all_counts ) / np .array (elapsed_times )
291+ seconds = np .cumsum (elapsed_times )
292+ return count_rate , seconds
232293
233294 def get_elapsed_time (self , fids : Sequence [int ] = None , ** kwds ) -> float | list [float ]: # type: ignore[override]
234295 """
@@ -254,7 +315,7 @@ def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float | list[f
254315 raise KeyError (
255316 "File statistics missing. Use 'read_dataframe' first." ,
256317 ) from exc
257- time_stamp_alias = self ._config ["dataframe" ].get ("time_stamp_alias " , "timeStamp" )
318+ time_stamp_alias = self ._config ["dataframe" ][ "columns" ] .get ("timestamp " , "timeStamp" )
258319
259320 def get_elapsed_time_from_fid (fid ):
260321 try :
@@ -407,7 +468,7 @@ def read_dataframe(
407468 self .metadata .update (self .parse_metadata (token ) if collect_metadata else {})
408469 self .metadata .update (bh .metadata )
409470
410- print (f"loading complete in { time .time () - t0 : .2f} s" )
471+ logger . info (f"Loading complete in { time .time () - t0 : .2f} s" )
411472
412473 return df , df_timed , self .metadata
413474
0 commit comments