From 459c402c1ac34ced3fbe0024e24b71476ba724a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ABlle=20TORTEROTOT?= <maelle.torterotot@ensta.fr>
Date: Wed, 7 Jan 2026 17:29:22 +0100
Subject: [PATCH 01/33] add show_recording_off

---
 src/post_processing/dataclass/data_aplose.py  |   4 +-
 .../dataclass/recording_period.py             | 126 ++++++++++++++----
 src/post_processing/utils/filtering_utils.py  |   4 +-
 .../PAMGuardMatlab-main/.DS_Store             | Bin 0 -> 6148 bytes
 .../PAMGuardMatlab-main/.MATLABDriveTag       |   1 +
 .../pgmatlab/.MATLABDriveTag                  |   1 +
 .../pgmatlab/Array/.MATLABDriveTag            |   1 +
 src/post_processing/utils/plot_utils.py       |  77 +++++++++--
 8 files changed, 169 insertions(+), 45 deletions(-)
 create mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store
 create mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag
 create mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag
 create mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag

diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py
index 98e6d9c..f604f80 100644
--- a/src/post_processing/dataclass/data_aplose.py
+++ b/src/post_processing/dataclass/data_aplose.py
@@ -393,7 +393,7 @@ def plot(
             color = kwargs.get("color")
             season = kwargs.get("season")
             effort = kwargs.get("effort")
-
+            show_recording_OFF = kwargs.get("show_recording_OFF")
             if not bin_size:
                 msg = "'bin_size' missing for histogram plot."
                 raise ValueError(msg)
@@ -409,7 +409,7 @@ def plot(
                 color=color,
                 season=season,
                 effort=effort,
-                coordinates=(self.lat, self.lon),
+                coordinates=(self.lat, self.lon)
             )
 
         if mode == "heatmap":
diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py
index 4c09722..15def1e 100644
--- a/src/post_processing/dataclass/recording_period.py
+++ b/src/post_processing/dataclass/recording_period.py
@@ -16,6 +16,8 @@
     cut,
     read_csv,
 )
+from pandas.tseries.offsets import BaseOffset
+import pandas as pd
 
 from post_processing.utils.core_utils import (
     get_time_range_and_bin_size,
@@ -33,42 +35,108 @@
 
 @dataclass(frozen=True)
 class RecordingPeriod:
-    """A class to handle recording periods."""
-
     counts: Series
     timebin_origin: Timedelta
 
     @classmethod
     def from_path(
         cls,
-        config: DetectionFilter,
-        date_format: str = TIMESTAMP_FORMATS_EXPORTED_FILES,
+        config,
         *,
         bin_size: Timedelta | BaseOffset,
-    ) -> RecordingPeriod:
-        """Return a list of Timestamps corresponding to recording periods."""
+    ) -> "RecordingPeriod":
+        """Vectorized creation of recording coverage from CSV with start/end datetimes.
+
+        CSV must have columns 'start_recording' and 'end_recording'.
+        bin_size can be a Timedelta (e.g., pd.Timedelta("1H")) or a pandas offset (e.g., "1D").
+        """
+        # 1. Read CSV and parse datetimes
         timestamp_file = config.timestamp_file
         delim = find_delimiter(timestamp_file)
-        timestamp_df = read_csv(timestamp_file, delimiter=delim)
-
-        if "timestamp" in timestamp_df.columns:
-            msg = "Parsing 'timestamp' column not implemented yet."
-            raise NotImplementedError(msg)
-
-        if "filename" in timestamp_df.columns:
-            timestamps = [
-                    strptime_from_text(ts, date_format)
-                    for ts in timestamp_df["filename"]
-                ]
-            timestamps = localize_timestamps(timestamps, config.timezone)
-            time_vector, bin_size = get_time_range_and_bin_size(timestamps, bin_size)
-
-            binned = cut(timestamps, time_vector)
-            max_annot = bin_size / config.timebin_origin
-
-            return cls(counts=binned.value_counts().sort_index().clip(upper=max_annot),
-                       timebin_origin=config.timebin_origin,
-                       )
-
-        msg = "Could not parse timestamps."
-        raise ValueError(msg)
+        df = pd.read_csv(
+            config.timestamp_file,
+            parse_dates=["start_recording", "end_recording"],
+            delimiter=delim
+        )
+
+        if df.empty:
+            raise ValueError("CSV is empty.")
+
+        # 2. Normalize timezones if needed
+        df["start_recording"] = (
+            pd.to_datetime(df["start_recording"], utc=True).dt.tz_convert(None)
+        )
+        df["end_recording"] = (
+            pd.to_datetime(df["end_recording"], utc=True).dt.tz_convert(None)
+        )
+
+        # Build fine-grained timeline (timebin_origin resolution)
+        origin = config.timebin_origin
+        time_index = pd.date_range(
+            start=df["start_recording"].min(),
+            end=df["end_recording"].max(),
+            freq=origin,
+        )
+
+        # Initialize effort vector
+        effort = pd.Series(0, index=time_index)
+
+        # Vectorized interval coverage
+        tvals = time_index.values[:, None]
+        start_vals = df["start_recording"].values
+        end_vals = df["end_recording"].values
+
+        covered = (tvals >= start_vals) & (tvals < end_vals)
+        effort[:] = covered.any(axis=1).astype(int)
+
+        # Aggregate effort into bin_size
+        counts = effort.resample(bin_size).sum()
+        counts.index = pd.interval_range(
+            start=counts.index[0],
+            periods=len(counts),
+            freq=bin_size,
+            closed="left",
+        )
+        return cls(counts=counts, timebin_origin=origin)
+
+# @dataclass(frozen=True)
+# class RecordingPeriod:
+#     """A class to handle recording periods."""
+#
+#     counts: Series
+#     timebin_origin: Timedelta
+#
+#     @classmethod
+#     def from_path(
+#         cls,
+#         config: DetectionFilter,
+#         date_format: str = TIMESTAMP_FORMATS_EXPORTED_FILES,
+#         *,
+#         bin_size: Timedelta | BaseOffset,
+#     ) -> RecordingPeriod:
+#         """Return a list of Timestamps corresponding to recording periods."""
+#         timestamp_file = config.timestamp_file
+#         delim = find_delimiter(timestamp_file)
+#         timestamp_df = read_csv(timestamp_file, delimiter=delim)
+#
+#         if "timestamp" in timestamp_df.columns:
+#             msg = "Parsing 'timestamp' column not implemented yet."
+#             raise NotImplementedError(msg)
+#
+#         if "filename" in timestamp_df.columns:
+#             timestamps = [
+#                     strptime_from_text(ts, date_format)
+#                     for ts in timestamp_df["filename"]
+#                 ]
+#             timestamps = localize_timestamps(timestamps, config.timezone)
+#             time_vector, bin_size = get_time_range_and_bin_size(timestamps, bin_size)
+#
+#             binned = cut(timestamps, time_vector)
+#             max_annot = bin_size / config.timebin_origin
+#
+#             return cls(counts=binned.value_counts().sort_index().clip(upper=max_annot),
+#                        timebin_origin=config.timebin_origin,
+#                        )
+#
+#         msg = "Could not parse timestamps."
+#         raise ValueError(msg)
diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py
index c391ff6..650b5a8 100644
--- a/src/post_processing/utils/filtering_utils.py
+++ b/src/post_processing/utils/filtering_utils.py
@@ -509,8 +509,8 @@ def reshape_timebin(
     timebin_new: Timedelta
         The size of the new time bin.
     timestamp_audio: list[Timestamp]
-        A list of Timestamp objects corresponding to the shape
-        in which the data should be reshaped.
+        A list of Timestamp objects corresponding to the start of each wav
+         that corresponds to a detection
 
     Returns
     -------
diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..8cbbca90b6e5166f54b08e62aaab675fe94f04f0
GIT binary patch
literal 6148
zcmeHK%}T>S5Z-O8-BN@c6nb3nTCkQ<C|*LXFJMFuDm5Xc2GeY5QhO+cob`o#5}(JJ
z-3?fpgC`L?1DkJles*_0$o??K_;4AXGv+YH1T;jBN{OJk)K#&;h+L16v6%TR2v+I5
zWT3xj!nF%FXHzzVm491*5UT)uAHh71(yY_@<dtge;IJ<0qA4D|XSMP&KU=0le{qk!
z8>Ql)(EZ>xnykjo@uf;KKT0MGoe+f+2zj`Rl2EONYMF$o&h?B#G(=<UwASlxZ_t+g
zPIuFm>p{2Emc4#&vuTKv)3b}~@k=~Q)SDrb1MganEEez%%1&Lc-Xe)rG6i3eSHu$%
z1H=F^Kn&~;1NICM&HX8vDkTPpfgdt}`-6mr=vvGT>a7Dlczs5H4G{%&d`looi>}4o
zAb3EyNd+{i+&(e5Ne8>MajwPOph;(3&kW<(nTy8@*Rz9N>Tt$ggY+c^h=FYes;2AU
z`F{n!Oz9(kJB51005R~-7~r*$HyT1w_H5lL56@Z&?FkwR#^tDhfWCDJfDY~>E$uXZ
bi8{o&7IT9*3fgr#AYBABA@m^zeu056q9RP<

literal 0
HcmV?d00001

diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag
new file mode 100644
index 0000000..84059a2
--- /dev/null
+++ b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag
@@ -0,0 +1 @@
+3496f669-9381-4974-bb7c-5cc1ddcb05d4
\ No newline at end of file
diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag
new file mode 100644
index 0000000..df9fcd4
--- /dev/null
+++ b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag
@@ -0,0 +1 @@
+ee53bc03-ef5e-44bc-aea4-8fae1e2a0b9f
\ No newline at end of file
diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag
new file mode 100644
index 0000000..656b51a
--- /dev/null
+++ b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag
@@ -0,0 +1 @@
+c4dd0a9d-e15d-496f-91ff-d9ff561a4fa0
\ No newline at end of file
diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index f21e343..2746a85 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -13,7 +13,7 @@
 from matplotlib.dates import num2date
 from matplotlib.ticker import PercentFormatter
 from numpy import ceil, histogram, polyfit
-from pandas import DataFrame, DatetimeIndex, Index, Timedelta, Timestamp, date_range
+from pandas import DataFrame, DatetimeIndex, Index, Timedelta, Timestamp, date_range, Series
 from pandas.tseries import frequencies
 from scipy.stats import pearsonr
 from seaborn import scatterplot
@@ -107,8 +107,9 @@ def histo(
     else:
         legend_labels = None
 
-    if effort:
-        normalize_counts_by_effort(df, effort, time_bin)
+    # if effort:
+    #     normalize_counts_by_effort(df, effort, time_bin)
+
 
     n_groups = len(labels) if legend_labels else 1
     bar_width = bin_size / n_groups
@@ -128,6 +129,8 @@ def histo(
             bar_kwargs["label"] = legend_labels[i]
 
         ax.bar(bin_starts + offset, df.iloc[:, i], **bar_kwargs)
+    if kwargs.get("show_recording_OFF"):
+        ax.set_facecolor("lightgrey")
 
     if len(df.columns) > 1 and legend:
         ax.legend(labels=legend_labels, bbox_to_anchor=(1.01, 1), loc="upper left")
@@ -138,7 +141,7 @@ def histo(
         f" - bin size: {bin_size_str})"
     )
     ax.set_ylabel(y_label)
-    set_y_axis_to_percentage(ax) if effort else set_dynamic_ylim(ax, df)
+    #set_y_axis_to_percentage(ax) if effort else set_dynamic_ylim(ax, df)
     set_plot_title(ax, annotators, labels)
     ax.set_xlim(begin, end)
 
@@ -659,20 +662,70 @@ def shade_no_effort(
 
 
     """
+    """Shade areas of the plot where no observation effort was made."""
     width_days = bar_width.total_seconds() / 86400
-    no_effort_bins = bin_starts[observed.counts.reindex(bin_starts) == 0]
-    for ts in no_effort_bins:
-        start = mdates.date2num(ts)
-        ax.axvspan(start, start + width_days, color="grey", alpha=0.08, zorder=1.5)
 
+    # Convert effort IntervalIndex → DatetimeIndex (bin starts)
+    effort_by_start = Series(
+        observed.counts.values,
+        index=[i.left for i in observed.counts.index],
+    ).tz_localize("UTC")
+
+    # Align effort to plotting bins
+    effort_aligned = effort_by_start.reindex(bin_starts)
+    max_effort = bar_width / observed.timebin_origin
+    effort_fraction = effort_aligned / max_effort
+
+    no_effort = effort_fraction == 0
+    partial_effort = (effort_fraction > 0) & (effort_fraction < 1)
+    # Draw partial effort first (lighter)
+    for ts in bin_starts[partial_effort]:
+        start = mdates.date2num(ts - bar_width)
+        ax.axvspan(
+            start,
+            start + width_days,
+            facecolor="0.65",
+            alpha=0.08,
+            linewidth=0,
+            zorder=0,
+        )
+
+    # Draw no effort on top (darker)
+    for ts in bin_starts[no_effort]:
+        start = mdates.date2num(ts - bar_width)
+        ax.axvspan(
+            start,
+            start + width_days,
+            facecolor="0.45",
+            alpha=0.15,
+            linewidth=0,
+            zorder=0,
+        )
+
+    # Outside data coverage
     x_min, x_max = ax.get_xlim()
-    data_min = mdates.date2num(bin_starts[0])
-    data_max = mdates.date2num(bin_starts[-1]) + width_days
+    data_min = mdates.date2num(bin_starts[0] - bar_width)
+    data_max = mdates.date2num(bin_starts[-1] + bar_width)
 
     if x_min < data_min:
-        ax.axvspan(x_min, data_min, color="grey", alpha=0.08, zorder=1.5)
+        ax.axvspan(
+            x_min,
+            data_min,
+            facecolor="0.45",
+            alpha=0.15,
+            linewidth=0,
+            zorder=0,
+        )
     if x_max > data_max:
-        ax.axvspan(data_max, x_max, color="grey", alpha=0.08, zorder=1.5)
+        ax.axvspan(
+            data_max,
+            x_max,
+            facecolor="0.45",
+            alpha=0.15,
+            linewidth=0,
+            zorder=0,
+        )
+
     ax.set_xlim(x_min, x_max)
 
 

From b8a38764ce04b065dc67ae30523ae0c1fa6c1865 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Fri, 9 Jan 2026 10:37:01 +0100
Subject: [PATCH 02/33] fix_ruff

---
 src/post_processing/dataclass/data_aplose.py  |  1 -
 .../dataclass/recording_period.py             | 66 ++-----------------
 2 files changed, 7 insertions(+), 60 deletions(-)

diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py
index f604f80..76b6d98 100644
--- a/src/post_processing/dataclass/data_aplose.py
+++ b/src/post_processing/dataclass/data_aplose.py
@@ -393,7 +393,6 @@ def plot(
             color = kwargs.get("color")
             season = kwargs.get("season")
             effort = kwargs.get("effort")
-            show_recording_OFF = kwargs.get("show_recording_OFF")
             if not bin_size:
                 msg = "'bin_size' missing for histogram plot."
                 raise ValueError(msg)
diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py
index 15def1e..901d932 100644
--- a/src/post_processing/dataclass/recording_period.py
+++ b/src/post_processing/dataclass/recording_period.py
@@ -8,21 +8,12 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
-from osekit.config import TIMESTAMP_FORMATS_EXPORTED_FILES
-from osekit.utils.timestamp_utils import strptime_from_text
+import pandas as pd
 from pandas import (
     Series,
     Timedelta,
-    cut,
-    read_csv,
 )
-from pandas.tseries.offsets import BaseOffset
-import pandas as pd
 
-from post_processing.utils.core_utils import (
-    get_time_range_and_bin_size,
-    localize_timestamps,
-)
 from post_processing.utils.filtering_utils import (
     find_delimiter,
 )
@@ -30,8 +21,6 @@
 if TYPE_CHECKING:
     from pandas.tseries.offsets import BaseOffset
 
-    from post_processing.dataclass.detection_filter import DetectionFilter
-
 
 @dataclass(frozen=True)
 class RecordingPeriod:
@@ -44,19 +33,20 @@ def from_path(
         config,
         *,
         bin_size: Timedelta | BaseOffset,
-    ) -> "RecordingPeriod":
-        """Vectorized creation of recording coverage from CSV with start/end datetimes.
+    ) -> RecordingPeriod:
+        """Vectorized creation of recording coverage from CSV with start/end datetime.
 
         CSV must have columns 'start_recording' and 'end_recording'.
-        bin_size can be a Timedelta (e.g., pd.Timedelta("1H")) or a pandas offset (e.g., "1D").
+        bin_size can be a Timedelta (e.g., pd.Timedelta("1H"))
+        or a pandas offset (e.g., "1D").
         """
-        # 1. Read CSV and parse datetimes
+        # 1. Read CSV and parse datetime
         timestamp_file = config.timestamp_file
         delim = find_delimiter(timestamp_file)
         df = pd.read_csv(
             config.timestamp_file,
             parse_dates=["start_recording", "end_recording"],
-            delimiter=delim
+            delimiter=delim,
         )
 
         if df.empty:
@@ -98,45 +88,3 @@ def from_path(
             closed="left",
         )
         return cls(counts=counts, timebin_origin=origin)
-
-# @dataclass(frozen=True)
-# class RecordingPeriod:
-#     """A class to handle recording periods."""
-#
-#     counts: Series
-#     timebin_origin: Timedelta
-#
-#     @classmethod
-#     def from_path(
-#         cls,
-#         config: DetectionFilter,
-#         date_format: str = TIMESTAMP_FORMATS_EXPORTED_FILES,
-#         *,
-#         bin_size: Timedelta | BaseOffset,
-#     ) -> RecordingPeriod:
-#         """Return a list of Timestamps corresponding to recording periods."""
-#         timestamp_file = config.timestamp_file
-#         delim = find_delimiter(timestamp_file)
-#         timestamp_df = read_csv(timestamp_file, delimiter=delim)
-#
-#         if "timestamp" in timestamp_df.columns:
-#             msg = "Parsing 'timestamp' column not implemented yet."
-#             raise NotImplementedError(msg)
-#
-#         if "filename" in timestamp_df.columns:
-#             timestamps = [
-#                     strptime_from_text(ts, date_format)
-#                     for ts in timestamp_df["filename"]
-#                 ]
-#             timestamps = localize_timestamps(timestamps, config.timezone)
-#             time_vector, bin_size = get_time_range_and_bin_size(timestamps, bin_size)
-#
-#             binned = cut(timestamps, time_vector)
-#             max_annot = bin_size / config.timebin_origin
-#
-#             return cls(counts=binned.value_counts().sort_index().clip(upper=max_annot),
-#                        timebin_origin=config.timebin_origin,
-#                        )
-#
-#         msg = "Could not parse timestamps."
-#         raise ValueError(msg)

From 639c6d6d8f6fc0f738b2a6ce37ee47fbd4407ccc Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Fri, 9 Jan 2026 10:38:51 +0100
Subject: [PATCH 03/33] fix_ruff2

---
 src/post_processing/dataclass/data_aplose.py |  2 +-
 src/post_processing/utils/core_utils.py      | 10 ++++----
 src/post_processing/utils/plot_utils.py      | 16 +++++++++----
 tests/conftest.py                            |  2 --
 tests/test_DataAplose.py                     |  9 +++++---
 tests/test_core_utils.py                     | 19 +++++++++-------
 tests/test_filtering_utils.py                | 24 +++++++++++---------
 tests/test_glider_utils.py                   |  2 +-
 tests/test_metric_utils.py                   |  3 ++-
 tests/test_plot_utils.py                     |  6 ++---
 10 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py
index 76b6d98..3c8d1e4 100644
--- a/src/post_processing/dataclass/data_aplose.py
+++ b/src/post_processing/dataclass/data_aplose.py
@@ -408,7 +408,7 @@ def plot(
                 color=color,
                 season=season,
                 effort=effort,
-                coordinates=(self.lat, self.lon)
+                coordinates=(self.lat, self.lon),
             )
 
         if mode == "heatmap":
diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py
index 5a831e1..9457ef2 100644
--- a/src/post_processing/utils/core_utils.py
+++ b/src/post_processing/utils/core_utils.py
@@ -11,7 +11,7 @@
 from astral.sun import sunrise, sunset
 from matplotlib import pyplot as plt
 from osekit.config import TIMESTAMP_FORMAT_AUDIO_FILE
-from osekit.utils.timestamp_utils import strptime_from_text, strftime_osmose_format
+from osekit.utils.timestamp_utils import strftime_osmose_format, strptime_from_text
 from pandas import (
     DataFrame,
     DatetimeIndex,
@@ -255,7 +255,6 @@ def add_weak_detection(
                         new_line.append(np.nan)
                     df.loc[df.index.max() + 1] = new_line
 
-
     return df.sort_values(by=["start_datetime", "annotator"]).reset_index(drop=True)
 
 
@@ -509,11 +508,10 @@ def get_time_range_and_bin_size(
 
     if isinstance(bin_size, Timedelta):
         return timestamp_range, bin_size
-    elif isinstance(bin_size, BaseOffset):
+    if isinstance(bin_size, BaseOffset):
         return timestamp_range, timestamp_range[1] - timestamp_range[0]
-    else:
-        msg = "bin_size must be a Timedelta or BaseOffset."
-        raise TypeError(msg)
+    msg = "bin_size must be a Timedelta or BaseOffset."
+    raise TypeError(msg)
 
 
 def round_begin_end_timestamps(
diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 2746a85..b6f566e 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -13,7 +13,15 @@
 from matplotlib.dates import num2date
 from matplotlib.ticker import PercentFormatter
 from numpy import ceil, histogram, polyfit
-from pandas import DataFrame, DatetimeIndex, Index, Timedelta, Timestamp, date_range, Series
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    Series,
+    Timedelta,
+    Timestamp,
+    date_range,
+)
 from pandas.tseries import frequencies
 from scipy.stats import pearsonr
 from seaborn import scatterplot
@@ -28,11 +36,10 @@
     timedelta_to_str,
 )
 from post_processing.utils.filtering_utils import (
+    filter_by_annotator,
     get_max_time,
     get_timezone,
-    filter_by_annotator,
 )
-from post_processing.utils.metrics_utils import normalize_counts_by_effort
 
 if TYPE_CHECKING:
     from datetime import tzinfo
@@ -110,7 +117,6 @@ def histo(
     # if effort:
     #     normalize_counts_by_effort(df, effort, time_bin)
 
-
     n_groups = len(labels) if legend_labels else 1
     bar_width = bin_size / n_groups
     bin_starts = mdates.date2num(df.index)
@@ -141,7 +147,7 @@ def histo(
         f" - bin size: {bin_size_str})"
     )
     ax.set_ylabel(y_label)
-    #set_y_axis_to_percentage(ax) if effort else set_dynamic_ylim(ax, df)
+    # set_y_axis_to_percentage(ax) if effort else set_dynamic_ylim(ax, df)
     set_plot_title(ax, annotators, labels)
     ax.set_xlim(begin, end)
 
diff --git a/tests/conftest.py b/tests/conftest.py
index e03bf43..99ff7ef 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -122,8 +122,6 @@
 """
 
 
-
-
 STATUS = """dataset,filename,ann1,ann2,ann3,ann4,ann5,ann6
 sample_dataset,2025_01_25_06_20_00,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED
 sample_dataset,2025_01_25_06_20_10,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED
diff --git a/tests/test_DataAplose.py b/tests/test_DataAplose.py
index 5ad1b04..9b9516c 100644
--- a/tests/test_DataAplose.py
+++ b/tests/test_DataAplose.py
@@ -19,6 +19,7 @@ def test_data_aplose_init(sample_df: DataFrame) -> None:
     assert data.begin == sample_df["start_datetime"].min()
     assert data.end == sample_df["end_datetime"].max()
 
+
 def test_filter_df_single_pair(sample_df: DataFrame) -> None:
     data = DataAplose(sample_df)
     filtered_data = data.filter_df(annotator="ann1", label="lbl1")
@@ -30,17 +31,19 @@ def test_filter_df_single_pair(sample_df: DataFrame) -> None:
     ].reset_index(drop=True)
     assert filtered_data.equals(expected)
 
+
 def test_change_tz(sample_df: DataFrame) -> None:
     data = DataAplose(sample_df)
-    new_tz = 'Etc/GMT-7'
+    new_tz = "Etc/GMT-7"
     data.change_tz(new_tz)
-    start_dt = data.df['start_datetime']
-    end_dt = data.df['end_datetime']
+    start_dt = data.df["start_datetime"]
+    end_dt = data.df["end_datetime"]
     assert all(ts.tz.zone == new_tz for ts in start_dt), f"The detection start timestamps have to be in {new_tz} timezone"
     assert all(ts.tz.zone == new_tz for ts in end_dt), f"The detection end timestamps have to be in {new_tz} timezone"
     assert data.begin.tz.zone == new_tz, f"The begin value of the DataAplose has to be in {new_tz} timezone"
     assert data.end.tz.zone == new_tz, f"The end value of the DataAplose has to be in {new_tz} timezone"
 
+
 def test_filter_df_multiple_pairs(sample_df: DataFrame) -> None:
     data = DataAplose(sample_df)
     filtered_data = data.filter_df(annotator=["ann1", "ann2"], label=["lbl1", "lbl2"])
diff --git a/tests/test_core_utils.py b/tests/test_core_utils.py
index a1a3d73..e72e482 100644
--- a/tests/test_core_utils.py
+++ b/tests/test_core_utils.py
@@ -8,6 +8,8 @@
 
 from post_processing.dataclass.data_aplose import DataAplose
 from post_processing.utils.core_utils import (
+    add_recording_period,
+    add_season_period,
     add_weak_detection,
     get_coordinates,
     get_count,
@@ -15,13 +17,11 @@
     get_season,
     get_sun_times,
     get_time_range_and_bin_size,
+    json2df,
     localize_timestamps,
     round_begin_end_timestamps,
-    timedelta_to_str,
-    add_season_period,
-    add_recording_period,
     set_bar_height,
-    json2df,
+    timedelta_to_str,
 )
 
 
@@ -409,10 +409,11 @@ def test_add_season_no_data() -> None:
 
 # %% add_recording_period
 
+
 def test_add_recording_period_valid() -> None:
     fig, ax = plt.subplots()
     start = Timestamp("2025-01-01T00:00:00+00:00")
-    stop  = Timestamp("2025-01-02T00:00:00+00:00")
+    stop = Timestamp("2025-01-02T00:00:00+00:00")
 
     ts = date_range(start=start, end=stop, freq="H", tz="UTC")
     values = list(range(len(ts)))
@@ -423,7 +424,7 @@ def test_add_recording_period_valid() -> None:
             [
                 Timestamp("2025-01-01T00:00:00+00:00"),
                 Timestamp("2025-01-02T00:00:00+00:00"),
-            ]
+            ],
         ],
         columns=["deployment_date", "recovery_date"],
     )
@@ -438,6 +439,7 @@ def test_add_recording_period_no_data() -> None:
 
 # %% set_bar_height
 
+
 def test_set_bar_height_valid() -> None:
     fig, ax = plt.subplots()
     start = Timestamp("2025-01-01T00:00:00+00:00")
@@ -457,6 +459,7 @@ def test_set_bar_height_no_data() -> None:
 
 # %% json2df
 
+
 def test_json2df_valid(tmp_path):
     fake_json = {
         "deployment_date": "2025-01-01T00:00:00+00:00",
@@ -474,9 +477,9 @@ def test_json2df_valid(tmp_path):
             [
                 Timestamp("2025-01-01T00:00:00+00:00"),
                 Timestamp("2025-01-02T00:00:00+00:00"),
-            ]
+            ],
         ],
         columns=["deployment_date", "recovery_date"],
     )
 
-    assert df.equals(expected)
\ No newline at end of file
+    assert df.equals(expected)
diff --git a/tests/test_filtering_utils.py b/tests/test_filtering_utils.py
index 95fd987..3ec3760 100644
--- a/tests/test_filtering_utils.py
+++ b/tests/test_filtering_utils.py
@@ -77,7 +77,7 @@ def test_find_delimiter_unsupported_delimiter(tmp_path: Path) -> None:
 
     with pytest.raises(
         ValueError,
-        match=r"unsupported delimiter '&'"
+        match=r"unsupported delimiter '&'",
     ):
         find_delimiter(file)
 
@@ -199,6 +199,7 @@ def test_filter_by_freq_valid(sample_df: DataFrame, f_min, f_max):
     if f_max is not None:
         assert (result["end_frequency"] <= f_max).all()
 
+
 @pytest.mark.parametrize(
     "f_min, f_max, expected_msg",
     [
@@ -216,8 +217,6 @@ def test_filter_by_freq_valid(sample_df: DataFrame, f_min, f_max):
         ),
     ],
 )
-
-
 def test_filter_by_freq_out_of_range(sample_df: DataFrame, f_min, f_max, expected_msg):
     with pytest.raises(ValueError, match=expected_msg):
         filter_by_freq(sample_df, f_min=f_min, f_max=f_max)
@@ -331,7 +330,7 @@ def test_get_timezone_several(sample_df: DataFrame) -> None:
     }
     sample_df = concat(
         [sample_df, DataFrame([new_row])],
-        ignore_index=False
+        ignore_index=False,
     )
     tz = get_timezone(sample_df)
     assert len(tz) == 2
@@ -340,6 +339,7 @@ def test_get_timezone_several(sample_df: DataFrame) -> None:
 
 # %% read DataFrame
 
+
 def test_read_dataframe_comma_delimiter(tmp_path: Path) -> None:
     csv_file = tmp_path / "test.csv"
     csv_file.write_text(
@@ -417,7 +417,7 @@ def test_no_timebin_several_tz(sample_df: DataFrame) -> None:
     }
     sample_df = concat(
         [sample_df, DataFrame([new_row])],
-        ignore_index=False
+        ignore_index=False,
     )
     timestamp_wav = to_datetime(sample_df["filename"],
                                 format="%Y_%m_%d_%H_%M_%S").dt.tz_localize(pytz.UTC)
@@ -429,7 +429,7 @@ def test_no_timebin_original_timebin(sample_df: DataFrame) -> None:
     tz = get_timezone(sample_df)
     timestamp_wav = to_datetime(
         sample_df["filename"],
-        format="%Y_%m_%d_%H_%M_%S"
+        format="%Y_%m_%d_%H_%M_%S",
     ).dt.tz_localize(tz)
     df_out = reshape_timebin(
         sample_df,
@@ -520,7 +520,7 @@ def test_simple_reshape_hourly(sample_df: DataFrame) -> None:
     tz = get_timezone(sample_df)
     timestamp_wav = to_datetime(
         sample_df["filename"],
-        format="%Y_%m_%d_%H_%M_%S"
+        format="%Y_%m_%d_%H_%M_%S",
     ).dt.tz_localize(tz)
     df_out = reshape_timebin(
         sample_df,
@@ -538,7 +538,7 @@ def test_reshape_daily_multiple_bins(sample_df: DataFrame) -> None:
     tz = get_timezone(sample_df)
     timestamp_wav = to_datetime(
         sample_df["filename"],
-        format="%Y_%m_%d_%H_%M_%S"
+        format="%Y_%m_%d_%H_%M_%S",
     ).dt.tz_localize(tz)
     df_out = reshape_timebin(sample_df, timestamp_audio=timestamp_wav, timebin_new=Timedelta(days=1))
     assert not df_out.empty
@@ -555,7 +555,7 @@ def test_with_manual_timestamps_vector(sample_df: DataFrame) -> None:
     df_out = reshape_timebin(
         sample_df,
         timestamp_audio=timestamp_wav,
-        timebin_new=Timedelta(hours=1)
+        timebin_new=Timedelta(hours=1),
     )
 
     assert not df_out.empty
@@ -589,6 +589,7 @@ def test_ensure_no_invalid_with_elements() -> None:
     assert "bar" in str(exc_info.value)
     assert "columns" in str(exc_info.value)
 
+
 def test_ensure_no_invalid_single_element() -> None:
     invalid_items = ["baz"]
     with pytest.raises(ValueError) as exc_info:
@@ -598,6 +599,7 @@ def test_ensure_no_invalid_single_element() -> None:
 
 # %% intersection / union
 
+
 def test_intersection(sample_df) -> None:
     df_result = intersection_or_union(sample_df[sample_df["annotator"].isin(["ann1", "ann2"])], user_sel="intersection")
 
@@ -628,7 +630,7 @@ def test_not_enough_annotators_raises() -> None:
         "annotation": ["cat"],
         "start_datetime": to_datetime(["2025-01-01 10:00"]),
         "end_datetime": to_datetime(["2025-01-01 10:01"]),
-        "annotator": ["A"]
+        "annotator": ["A"],
     })
     with pytest.raises(ValueError, match="Not enough annotators detected"):
-        intersection_or_union(df_single_annotator, user_sel="intersection")
\ No newline at end of file
+        intersection_or_union(df_single_annotator, user_sel="intersection")
diff --git a/tests/test_glider_utils.py b/tests/test_glider_utils.py
index 12d83df..d0247c5 100644
--- a/tests/test_glider_utils.py
+++ b/tests/test_glider_utils.py
@@ -56,7 +56,7 @@ def test_get_position_from_timestamp(nav_df: DataFrame) -> None:
 
 def test_plot_detections_with_nav_data(
         df_detections: DataFrame,
-        nav_df: DataFrame
+        nav_df: DataFrame,
 ) -> None:
     plot_detections_with_nav_data(
         df=df_detections,
diff --git a/tests/test_metric_utils.py b/tests/test_metric_utils.py
index 34ce769..35717e7 100644
--- a/tests/test_metric_utils.py
+++ b/tests/test_metric_utils.py
@@ -3,6 +3,7 @@
 
 from post_processing.utils.metrics_utils import detection_perf
 
+
 def test_detection_perf(sample_df: DataFrame) -> None:
     try:
         detection_perf(df=sample_df[sample_df["annotator"].isin(["ann1", "ann4"])], ref=("ann1", "lbl1"))
@@ -12,4 +13,4 @@ def test_detection_perf(sample_df: DataFrame) -> None:
 
 def test_detection_perf_one_annotator(sample_df: DataFrame) -> None:
     with pytest.raises(ValueError, match="Two annotators needed"):
-        detection_perf(df=sample_df[sample_df["annotator"] == "ann1"], ref=("ann1", "lbl1"))
\ No newline at end of file
+        detection_perf(df=sample_df[sample_df["annotator"] == "ann1"], ref=("ann1", "lbl1"))
diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py
index d7392cf..e7a389c 100644
--- a/tests/test_plot_utils.py
+++ b/tests/test_plot_utils.py
@@ -4,10 +4,10 @@
 from numpy import arange, testing
 
 from post_processing.utils.plot_utils import (
-    overview,
     _wrap_xtick_labels,
-    set_y_axis_to_percentage,
     get_legend,
+    overview,
+    set_y_axis_to_percentage,
 )
 
 
@@ -103,4 +103,4 @@ def test_lists_and_strings_combined():
     labels = ["Label1", "Label2"]
     result = get_legend(annotators, labels)
     expected = ["Alice\nLabel1", "Bob\nLabel2"]
-    assert result == expected
\ No newline at end of file
+    assert result == expected

From b2ebfd71485c864ea7f3f5fd5a7f526e1d43d5ce Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Fri, 9 Jan 2026 13:50:54 +0100
Subject: [PATCH 04/33] adapt RecordingPeriod class

---
 .../dataclass/recording_period.py             | 98 +++++++++++++++----
 1 file changed, 81 insertions(+), 17 deletions(-)

diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py
index 901d932..11524e6 100644
--- a/src/post_processing/dataclass/recording_period.py
+++ b/src/post_processing/dataclass/recording_period.py
@@ -24,6 +24,8 @@
 
 @dataclass(frozen=True)
 class RecordingPeriod:
+    """Represents recording effort over time, aggregated into bins."""
+
     counts: Series
     timebin_origin: Timedelta
 
@@ -34,33 +36,91 @@ def from_path(
         *,
         bin_size: Timedelta | BaseOffset,
     ) -> RecordingPeriod:
-        """Vectorized creation of recording coverage from CSV with start/end datetime.
+        """Vectorized creation of recording coverage from CSV with start/end datetimes.
+
+        This method reads a CSV with columns:
+        - 'start_recording'
+        - 'end_recording'
+        - 'start_deployment'
+        - 'end_deployment'
+
+        It computes the **effective recording interval** as the intersection between
+        recording and deployment periods, builds a fine-grained timeline at
+        `timebin_origin` resolution, and aggregates effort into `bin_size` bins.
+
+        Parameters
+        ----------
+        config
+            Configuration object containing at least:
+            - `timestamp_file`: path to CSV
+            - `timebin_origin`: Timedelta resolution of detections
+        bin_size : Timedelta or BaseOffset
+            Size of the aggregation bin (e.g., pd.Timedelta("1H") or "1D").
+
+        Returns
+        -------
+        RecordingPeriod
+            Object containing `counts` (Series indexed by IntervalIndex) and
+            `timebin_origin`.
 
-        CSV must have columns 'start_recording' and 'end_recording'.
-        bin_size can be a Timedelta (e.g., pd.Timedelta("1H"))
-        or a pandas offset (e.g., "1D").
         """
-        # 1. Read CSV and parse datetime
+        # 1. Read CSV and parse datetime columns
         timestamp_file = config.timestamp_file
         delim = find_delimiter(timestamp_file)
         df = pd.read_csv(
             config.timestamp_file,
-            parse_dates=["start_recording", "end_recording"],
+            parse_dates=[
+                "start_recording",
+                "end_recording",
+                "start_deployment",
+                "end_deployment",
+            ],
             delimiter=delim,
         )
 
         if df.empty:
             raise ValueError("CSV is empty.")
 
-        # 2. Normalize timezones if needed
-        df["start_recording"] = (
-            pd.to_datetime(df["start_recording"], utc=True).dt.tz_convert(None)
-        )
-        df["end_recording"] = (
-            pd.to_datetime(df["end_recording"], utc=True).dt.tz_convert(None)
-        )
+        # Ensure all required columns are present
+        required_columns = {
+            "start_recording",
+            "end_recording",
+            "start_deployment",
+            "end_deployment",
+        }
+
+        missing = required_columns - set(df.columns)
+
+        if missing:
+            raise ValueError(
+                f"CSV is missing required columns: {', '.join(sorted(missing))}",
+            )
+
+        # 2. Normalize timezones: convert to UTC, then remove tz info (naive)
+        for col in [
+            "start_recording",
+            "end_recording",
+            "start_deployment",
+            "end_deployment",
+        ]:
+            df[col] = pd.to_datetime(df[col], utc=True).dt.tz_convert(None)
+
+        # 3. Compute effective recording intervals (intersection)
+        df["start_recording"] = df[
+            ["start_recording", "start_deployment"]
+        ].max(axis=1)
+
+        df["end_recording"] = df[
+            ["end_recording", "end_deployment"]
+        ].min(axis=1)
+
+        # Remove rows with no actual recording interval
+        df = df.loc[df["start_recording"] < df["end_recording"]].copy()
 
-        # Build fine-grained timeline (timebin_origin resolution)
+        if df.empty:
+            raise ValueError("No valid recording intervals after deployment intersection.")
+
+        # 4. Build fine-grained timeline at `timebin_origin` resolution
         origin = config.timebin_origin
         time_index = pd.date_range(
             start=df["start_recording"].min(),
@@ -68,19 +128,23 @@ def from_path(
             freq=origin,
         )
 
-        # Initialize effort vector
+        # Initialize effort vector (0 = no recording, 1 = recording)
+        # Compare each timestamp to all intervals in a vectorized manner
         effort = pd.Series(0, index=time_index)
 
-        # Vectorized interval coverage
+        # 5. Vectorized interval coverage
         tvals = time_index.values[:, None]
         start_vals = df["start_recording"].values
         end_vals = df["end_recording"].values
 
+        # Boolean matrix: True if timestamp is within any recording interval
         covered = (tvals >= start_vals) & (tvals < end_vals)
         effort[:] = covered.any(axis=1).astype(int)
 
-        # Aggregate effort into bin_size
+        # 6. Aggregate effort into user-defined bin_size
         counts = effort.resample(bin_size).sum()
+
+        # Replace index with IntervalIndex for downstream compatibility
         counts.index = pd.interval_range(
             start=counts.index[0],
             periods=len(counts),

From f74587e880facfb01d71bb350d11b24b43bd5703 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Mon, 12 Jan 2026 15:37:47 +0100
Subject: [PATCH 05/33] add test_recording_preiod

---
 tests/conftest.py              | 28 ++++++++++++++-
 tests/test_recording_period.py | 63 ++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_recording_period.py

diff --git a/tests/conftest.py b/tests/conftest.py
index 99ff7ef..9a26f97 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,7 +7,7 @@
 import soundfile as sf
 import yaml
 from osekit.utils.timestamp_utils import strftime_osmose_format
-from pandas import DataFrame, read_csv
+from pandas import DataFrame, Timedelta, read_csv
 
 SAMPLE = """dataset,filename,start_time,end_time,start_frequency,end_frequency,annotation,annotator,start_datetime,end_datetime,type,score
 sample_dataset,2025_01_25_06_20_00,0.0,10.0,0.0,72000.0,lbl2,ann2,2025-01-25T06:20:00.000+00:00,2025-01-25T06:20:10.000+00:00,WEAK,0.11
@@ -132,6 +132,14 @@
 sample_dataset,2025_01_26_06_20_20,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED,FINISHED
 """
 
+# ---------------------------------------------------------------------------
+# Fake recording planning CSV used for tests
+# ---------------------------------------------------------------------------
+RECORDING_PLANNING_CSV = """start_recording,end_recording,start_deployment,end_deployment
+2024-01-01 00:00:00+0000,2024-04-09 02:00:00+0000,2024-01-02 00:00:00+0000,2024-04-30 02:00:00+0000
+2024-04-30 01:00:00+0000,2024-07-03 06:00:00+0000,2024-04-09 04:00:00+0000,2024-07-14 14:00:00+0000
+"""
+
 
 @pytest.fixture
 def sample_df() -> DataFrame:
@@ -226,3 +234,21 @@ def create_file(path: Path, size: int = 2048):
     create_file(nested / "file4.wav")
     (tmp_path / "ignore.txt").write_text("not audio")
     return tmp_path
+
+
+@pytest.fixture
+def recording_planning_csv(tmp_path) -> Path:
+    """Create a temporary CSV file simulating a recording planning."""
+    path = tmp_path / "recording_planning.csv"
+    path.write_text(RECORDING_PLANNING_CSV)
+    return path
+
+
+@pytest.fixture
+def recording_planning_config(recording_planning_csv):
+    """Minimal config object compatible with RecordingPeriod.from_path."""
+    class RecordingPlanningConfig:
+        timestamp_file: Path = recording_planning_csv
+        timebin_origin = Timedelta("1min")
+
+    return RecordingPlanningConfig()
diff --git a/tests/test_recording_period.py b/tests/test_recording_period.py
new file mode 100644
index 0000000..12464e7
--- /dev/null
+++ b/tests/test_recording_period.py
@@ -0,0 +1,63 @@
+import pandas as pd
+
+from post_processing.dataclass.recording_period import RecordingPeriod
+
+
+def test_recording_period_with_gaps(recording_planning_config):
+    """RecordingPeriod correctly represents long gaps with no recording effort.
+
+    The planning contains two recording blocks separated by ~3 weeks with no
+    recording at all. Weekly aggregation must reflect:
+    - weeks with full effort,
+    - weeks with partial effort,
+    - weeks with zero effort.
+    """
+    recording_period = RecordingPeriod.from_path(
+        config=recording_planning_config,
+        bin_size="1W",
+    )
+
+    counts = recording_period.counts
+
+    # ------------------------------------------------------------------
+    # Structural checks
+    # ------------------------------------------------------------------
+    assert not counts.empty
+    assert counts.index.is_interval()
+    assert counts.min() >= 0
+
+    # One week = 7 * 24 hours (origin = 1min)
+    full_week_minutes = 7 * 24 * 60
+
+    # ------------------------------------------------------------------
+    # Helper: find the bin covering a given timestamp
+    # ------------------------------------------------------------------
+    def bin_covering(ts: pd.Timestamp) -> pd.Interval:
+        for interval in counts.index:
+            if interval.left <= ts < interval.right:
+                return interval
+        raise AssertionError(f"No bin covers timestamp {ts}")
+
+    # ------------------------------------------------------------------
+    # Week fully inside the long gap → zero effort
+    # ------------------------------------------------------------------
+    gap_ts = pd.Timestamp("2024-04-21")
+
+    gap_bin = bin_covering(gap_ts)
+    assert counts.loc[gap_bin] == 0
+
+    # ------------------------------------------------------------------
+    # Week fully inside recording → full effort
+    # ------------------------------------------------------------------
+    full_effort_ts = pd.Timestamp("2024-02-04")
+
+    full_bin = bin_covering(full_effort_ts)
+    assert counts.loc[full_bin] == full_week_minutes
+
+    # ------------------------------------------------------------------
+    # Week overlapping recording stop → partial effort
+    # ------------------------------------------------------------------
+    partial_ts = pd.Timestamp("2024-04-14")
+
+    partial_bin = bin_covering(partial_ts)
+    assert counts.loc[partial_bin] == 1560

From c1600bf894333969ca8510417f304d8c9fa49437 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Mon, 12 Jan 2026 17:15:20 +0100
Subject: [PATCH 06/33] add test_shade_no_effort

---
 tests/conftest.py              |  7 +++---
 tests/test_plot_utils.py       | 40 ++++++++++++++++++++++++++++++++++
 tests/test_recording_period.py |  3 ++-
 3 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 9a26f97..a6299e3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,7 +7,8 @@
 import soundfile as sf
 import yaml
 from osekit.utils.timestamp_utils import strftime_osmose_format
-from pandas import DataFrame, Timedelta, read_csv
+from pandas import DataFrame, read_csv
+from pandas.tseries import frequencies
 
 SAMPLE = """dataset,filename,start_time,end_time,start_frequency,end_frequency,annotation,annotator,start_datetime,end_datetime,type,score
 sample_dataset,2025_01_25_06_20_00,0.0,10.0,0.0,72000.0,lbl2,ann2,2025-01-25T06:20:00.000+00:00,2025-01-25T06:20:10.000+00:00,WEAK,0.11
@@ -137,7 +138,7 @@
 # ---------------------------------------------------------------------------
 RECORDING_PLANNING_CSV = """start_recording,end_recording,start_deployment,end_deployment
 2024-01-01 00:00:00+0000,2024-04-09 02:00:00+0000,2024-01-02 00:00:00+0000,2024-04-30 02:00:00+0000
-2024-04-30 01:00:00+0000,2024-07-03 06:00:00+0000,2024-04-09 04:00:00+0000,2024-07-14 14:00:00+0000
+2024-04-30 01:00:00+0000,2024-07-14 06:00:00+0000,2024-04-30 02:00:00+0000,2024-07-06 14:00:00+0000
 """
 
 
@@ -249,6 +250,6 @@ def recording_planning_config(recording_planning_csv):
     """Minimal config object compatible with RecordingPeriod.from_path."""
     class RecordingPlanningConfig:
         timestamp_file: Path = recording_planning_csv
-        timebin_origin = Timedelta("1min")
+        timebin_origin = frequencies.to_offset("1min")
 
     return RecordingPlanningConfig()
diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py
index e7a389c..37c2243 100644
--- a/tests/test_plot_utils.py
+++ b/tests/test_plot_utils.py
@@ -1,13 +1,19 @@
+from unittest.mock import MagicMock
+
 import matplotlib.pyplot as plt
 import pytest
 from matplotlib.ticker import PercentFormatter
 from numpy import arange, testing
+from pandas import Series, Timedelta, to_datetime
+from pandas.tseries import frequencies
 
+from post_processing.dataclass.recording_period import RecordingPeriod
 from post_processing.utils.plot_utils import (
     _wrap_xtick_labels,
     get_legend,
     overview,
     set_y_axis_to_percentage,
+    shade_no_effort,
 )
 
 
@@ -104,3 +110,37 @@ def test_lists_and_strings_combined():
     result = get_legend(annotators, labels)
     expected = ["Alice\nLabel1", "Bob\nLabel2"]
     assert result == expected
+
+
+def test_shade_no_effort_from_recording_planning(recording_planning_config):
+    """shade_no_effort shades contiguous zero-effort periods."""
+
+    def count_contiguous_zero_segments(effort: Series) -> int:
+        """Return number of contiguous zero-effort segments."""
+        is_zero = effort == 0
+        return ((is_zero != is_zero.shift(fill_value=False)) & is_zero).sum()
+
+    recording_period = RecordingPeriod.from_path(
+        config=recording_planning_config,
+        bin_size=frequencies.to_offset("1W"),
+    )
+
+    counts = recording_period.counts
+
+    bin_starts = to_datetime(
+        [interval.left for interval in counts.index],
+    )
+
+    fig, ax = plt.subplots()
+    ax.axvspan = MagicMock()
+
+    shade_no_effort(
+        ax=ax,
+        bin_starts=bin_starts,
+        observed=recording_period,
+        bar_width=Timedelta("7D"),
+    )
+
+    expected_spans = count_contiguous_zero_segments(counts)
+
+    assert ax.axvspan.call_count == expected_spans
diff --git a/tests/test_recording_period.py b/tests/test_recording_period.py
index 12464e7..41b9e59 100644
--- a/tests/test_recording_period.py
+++ b/tests/test_recording_period.py
@@ -1,4 +1,5 @@
 import pandas as pd
+from pandas.tseries import frequencies
 
 from post_processing.dataclass.recording_period import RecordingPeriod
 
@@ -14,7 +15,7 @@ def test_recording_period_with_gaps(recording_planning_config):
     """
     recording_period = RecordingPeriod.from_path(
         config=recording_planning_config,
-        bin_size="1W",
+        bin_size=frequencies.to_offset("1W"),
     )
 
     counts = recording_period.counts

From 0e22298596aeca66821bc6bbca16df72231f40fa Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Tue, 13 Jan 2026 09:47:44 +0100
Subject: [PATCH 07/33] delete useluss in shade_no_effort

---
 src/post_processing/utils/plot_utils.py | 26 -------------------------
 1 file changed, 26 deletions(-)

diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index b6f566e..75c421f 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -708,32 +708,6 @@ def shade_no_effort(
             zorder=0,
         )
 
-    # Outside data coverage
-    x_min, x_max = ax.get_xlim()
-    data_min = mdates.date2num(bin_starts[0] - bar_width)
-    data_max = mdates.date2num(bin_starts[-1] + bar_width)
-
-    if x_min < data_min:
-        ax.axvspan(
-            x_min,
-            data_min,
-            facecolor="0.45",
-            alpha=0.15,
-            linewidth=0,
-            zorder=0,
-        )
-    if x_max > data_max:
-        ax.axvspan(
-            data_max,
-            x_max,
-            facecolor="0.45",
-            alpha=0.15,
-            linewidth=0,
-            zorder=0,
-        )
-
-    ax.set_xlim(x_min, x_max)
-
 
 def add_sunrise_sunset(ax: Axes, lat: float, lon: float, tz: tzinfo) -> None:
     """Display sunrise/sunset times on plot."""

From e889c29cc52a864f29363257128b9f9dd55e2a59 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Tue, 13 Jan 2026 10:03:01 +0100
Subject: [PATCH 08/33] delete test shading_no_effort

---
 tests/test_plot_utils.py | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py
index 37c2243..49aff37 100644
--- a/tests/test_plot_utils.py
+++ b/tests/test_plot_utils.py
@@ -112,35 +112,3 @@ def test_lists_and_strings_combined():
     assert result == expected
 
 
-def test_shade_no_effort_from_recording_planning(recording_planning_config):
-    """shade_no_effort shades contiguous zero-effort periods."""
-
-    def count_contiguous_zero_segments(effort: Series) -> int:
-        """Return number of contiguous zero-effort segments."""
-        is_zero = effort == 0
-        return ((is_zero != is_zero.shift(fill_value=False)) & is_zero).sum()
-
-    recording_period = RecordingPeriod.from_path(
-        config=recording_planning_config,
-        bin_size=frequencies.to_offset("1W"),
-    )
-
-    counts = recording_period.counts
-
-    bin_starts = to_datetime(
-        [interval.left for interval in counts.index],
-    )
-
-    fig, ax = plt.subplots()
-    ax.axvspan = MagicMock()
-
-    shade_no_effort(
-        ax=ax,
-        bin_starts=bin_starts,
-        observed=recording_period,
-        bar_width=Timedelta("7D"),
-    )
-
-    expected_spans = count_contiguous_zero_segments(counts)
-
-    assert ax.axvspan.call_count == expected_spans

From 2349713661ee44fd2bfa84e2a5a7ac7ff1d051d2 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Tue, 13 Jan 2026 10:04:58 +0100
Subject: [PATCH 09/33] fix ruff

---
 src/post_processing/dataclass/recording_period.py | 15 ++++++++-------
 tests/test_plot_utils.py                          |  7 -------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py
index 11524e6..32319fd 100644
--- a/src/post_processing/dataclass/recording_period.py
+++ b/src/post_processing/dataclass/recording_period.py
@@ -106,16 +106,16 @@ def from_path(
             df[col] = pd.to_datetime(df[col], utc=True).dt.tz_convert(None)
 
         # 3. Compute effective recording intervals (intersection)
-        df["start_recording"] = df[
+        df["effective_start_recording"] = df[
             ["start_recording", "start_deployment"]
         ].max(axis=1)
 
-        df["end_recording"] = df[
+        df["effective_end_recording"] = df[
             ["end_recording", "end_deployment"]
         ].min(axis=1)
 
         # Remove rows with no actual recording interval
-        df = df.loc[df["start_recording"] < df["end_recording"]].copy()
+        df = df.loc[df["effective_start_recording"] < df["effective_end_recording"]].copy()
 
         if df.empty:
             raise ValueError("No valid recording intervals after deployment intersection.")
@@ -123,8 +123,8 @@ def from_path(
         # 4. Build fine-grained timeline at `timebin_origin` resolution
         origin = config.timebin_origin
         time_index = pd.date_range(
-            start=df["start_recording"].min(),
-            end=df["end_recording"].max(),
+            start=df["effective_start_recording"].min(),
+            end=df["effective_end_recording"].max(),
             freq=origin,
         )
 
@@ -134,8 +134,8 @@ def from_path(
 
         # 5. Vectorized interval coverage
         tvals = time_index.values[:, None]
-        start_vals = df["start_recording"].values
-        end_vals = df["end_recording"].values
+        start_vals = df["effective_start_recording"].values
+        end_vals = df["effective_end_recording"].values
 
         # Boolean matrix: True if timestamp is within any recording interval
         covered = (tvals >= start_vals) & (tvals < end_vals)
@@ -151,4 +151,5 @@ def from_path(
             freq=bin_size,
             closed="left",
         )
+
         return cls(counts=counts, timebin_origin=origin)
diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py
index 49aff37..ffabbbd 100644
--- a/tests/test_plot_utils.py
+++ b/tests/test_plot_utils.py
@@ -1,19 +1,14 @@
-from unittest.mock import MagicMock
 
 import matplotlib.pyplot as plt
 import pytest
 from matplotlib.ticker import PercentFormatter
 from numpy import arange, testing
-from pandas import Series, Timedelta, to_datetime
-from pandas.tseries import frequencies
 
-from post_processing.dataclass.recording_period import RecordingPeriod
 from post_processing.utils.plot_utils import (
     _wrap_xtick_labels,
     get_legend,
     overview,
     set_y_axis_to_percentage,
-    shade_no_effort,
 )
 
 
@@ -110,5 +105,3 @@ def test_lists_and_strings_combined():
     result = get_legend(annotators, labels)
     expected = ["Alice\nLabel1", "Bob\nLabel2"]
     assert result == expected
-
-

From 77f71d1872e056186a071652056d980fcfeb5739 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Tue, 13 Jan 2026 10:34:28 +0100
Subject: [PATCH 10/33] delete matlab boring files

---
 .../PAMGuardMatlab-main/.DS_Store                | Bin 6148 -> 0 bytes
 .../PAMGuardMatlab-main/.MATLABDriveTag          |   1 -
 .../PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag |   1 -
 .../pgmatlab/Array/.MATLABDriveTag               |   1 -
 4 files changed, 3 deletions(-)
 delete mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store
 delete mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag
 delete mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag
 delete mode 100644 src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag

diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.DS_Store
deleted file mode 100644
index 8cbbca90b6e5166f54b08e62aaab675fe94f04f0..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHK%}T>S5Z-O8-BN@c6nb3nTCkQ<C|*LXFJMFuDm5Xc2GeY5QhO+cob`o#5}(JJ
z-3?fpgC`L?1DkJles*_0$o??K_;4AXGv+YH1T;jBN{OJk)K#&;h+L16v6%TR2v+I5
zWT3xj!nF%FXHzzVm491*5UT)uAHh71(yY_@<dtge;IJ<0qA4D|XSMP&KU=0le{qk!
z8>Ql)(EZ>xnykjo@uf;KKT0MGoe+f+2zj`Rl2EONYMF$o&h?B#G(=<UwASlxZ_t+g
zPIuFm>p{2Emc4#&vuTKv)3b}~@k=~Q)SDrb1MganEEez%%1&Lc-Xe)rG6i3eSHu$%
z1H=F^Kn&~;1NICM&HX8vDkTPpfgdt}`-6mr=vvGT>a7Dlczs5H4G{%&d`looi>}4o
zAb3EyNd+{i+&(e5Ne8>MajwPOph;(3&kW<(nTy8@*Rz9N>Tt$ggY+c^h=FYes;2AU
z`F{n!Oz9(kJB51005R~-7~r*$HyT1w_H5lL56@Z&?FkwR#^tDhfWCDJfDY~>E$uXZ
bi8{o&7IT9*3fgr#AYBABA@m^zeu056q9RP<

diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag
deleted file mode 100644
index 84059a2..0000000
--- a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/.MATLABDriveTag
+++ /dev/null
@@ -1 +0,0 @@
-3496f669-9381-4974-bb7c-5cc1ddcb05d4
\ No newline at end of file
diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag
deleted file mode 100644
index df9fcd4..0000000
--- a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/.MATLABDriveTag
+++ /dev/null
@@ -1 +0,0 @@
-ee53bc03-ef5e-44bc-aea4-8fae1e2a0b9f
\ No newline at end of file
diff --git a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag b/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag
deleted file mode 100644
index 656b51a..0000000
--- a/src/post_processing/utils/matlab_utils/PAMGuardMatlab-main/PAMGuardMatlab-main/pgmatlab/Array/.MATLABDriveTag
+++ /dev/null
@@ -1 +0,0 @@
-c4dd0a9d-e15d-496f-91ff-d9ff561a4fa0
\ No newline at end of file

From e8c73e074eff7de6719cc3150c867865ee2ff775 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Wed, 14 Jan 2026 16:15:46 +0100
Subject: [PATCH 11/33] few modifs

---
 .../dataclass/detection_filter.py             | 10 ++-
 .../dataclass/recording_period.py             | 62 ++++++++++---------
 src/post_processing/utils/filtering_utils.py  | 22 ++++---
 src/post_processing/utils/plot_utils.py       |  2 +-
 4 files changed, 55 insertions(+), 41 deletions(-)

diff --git a/src/post_processing/dataclass/detection_filter.py b/src/post_processing/dataclass/detection_filter.py
index d636c4c..b2d2599 100644
--- a/src/post_processing/dataclass/detection_filter.py
+++ b/src/post_processing/dataclass/detection_filter.py
@@ -7,9 +7,9 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass
+from dataclasses import dataclass, fields
 from pathlib import Path
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING, Literal, Any
 
 import yaml
 from pandas import Timedelta, Timestamp
@@ -44,6 +44,12 @@ class DetectionFilter:
     box: bool = False
     filename_format: str = None
 
+    def __getitem__(self, key: str):
+        """Return the value of the given key."""
+        if key in {f.name for f in fields(self)}:
+            return getattr(self, key)
+        raise KeyError(key)
+
     @classmethod
     def from_yaml(
         cls,
diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py
index 32319fd..637733e 100644
--- a/src/post_processing/dataclass/recording_period.py
+++ b/src/post_processing/dataclass/recording_period.py
@@ -8,10 +8,13 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
-import pandas as pd
 from pandas import (
     Series,
     Timedelta,
+    date_range,
+    interval_range,
+    read_csv,
+    to_datetime,
 )
 
 from post_processing.utils.filtering_utils import (
@@ -21,6 +24,8 @@
 if TYPE_CHECKING:
     from pandas.tseries.offsets import BaseOffset
 
+    from post_processing.dataclass.detection_filter import DetectionFilter
+
 
 @dataclass(frozen=True)
 class RecordingPeriod:
@@ -32,17 +37,17 @@ class RecordingPeriod:
     @classmethod
     def from_path(
         cls,
-        config,
+        config: DetectionFilter,
         *,
         bin_size: Timedelta | BaseOffset,
     ) -> RecordingPeriod:
         """Vectorized creation of recording coverage from CSV with start/end datetimes.
 
         This method reads a CSV with columns:
-        - 'start_recording'
-        - 'end_recording'
-        - 'start_deployment'
-        - 'end_deployment'
+        - "start_recording"
+        - "end_recording"
+        - "start_deployment"
+        - "end_deployment"
 
         It computes the **effective recording interval** as the intersection between
         recording and deployment periods, builds a fine-grained timeline at
@@ -55,7 +60,7 @@ def from_path(
             - `timestamp_file`: path to CSV
             - `timebin_origin`: Timedelta resolution of detections
         bin_size : Timedelta or BaseOffset
-            Size of the aggregation bin (e.g., pd.Timedelta("1H") or "1D").
+            Size of the aggregation bin (e.g., Timedelta("1H") or "1D").
 
         Returns
         -------
@@ -64,10 +69,10 @@ def from_path(
             `timebin_origin`.
 
         """
-        # 1. Read CSV and parse datetime columns
+        # Read CSV and parse datetime columns
         timestamp_file = config.timestamp_file
         delim = find_delimiter(timestamp_file)
-        df = pd.read_csv(
+        df = read_csv(
             config.timestamp_file,
             parse_dates=[
                 "start_recording",
@@ -79,7 +84,8 @@ def from_path(
         )
 
         if df.empty:
-            raise ValueError("CSV is empty.")
+            msg = "CSV is empty."
+            raise ValueError(msg)
 
         # Ensure all required columns are present
         required_columns = {
@@ -92,20 +98,19 @@ def from_path(
         missing = required_columns - set(df.columns)
 
         if missing:
-            raise ValueError(
-                f"CSV is missing required columns: {', '.join(sorted(missing))}",
-            )
+            msg = f"CSV is missing required columns: {', '.join(sorted(missing))}"
+            raise ValueError(msg)
 
-        # 2. Normalize timezones: convert to UTC, then remove tz info (naive)
+        # Normalize timezones: convert to UTC, then remove tz info (naive)
         for col in [
             "start_recording",
             "end_recording",
             "start_deployment",
             "end_deployment",
         ]:
-            df[col] = pd.to_datetime(df[col], utc=True).dt.tz_convert(None)
+            df[col] = to_datetime(df[col], utc=True).dt.tz_convert(None)
 
-        # 3. Compute effective recording intervals (intersection)
+        # Compute effective recording intervals (intersection)
         df["effective_start_recording"] = df[
             ["start_recording", "start_deployment"]
         ].max(axis=1)
@@ -118,11 +123,12 @@ def from_path(
         df = df.loc[df["effective_start_recording"] < df["effective_end_recording"]].copy()
 
         if df.empty:
-            raise ValueError("No valid recording intervals after deployment intersection.")
+            msg = "No valid recording intervals after deployment intersection."
+            raise ValueError(msg)
 
-        # 4. Build fine-grained timeline at `timebin_origin` resolution
+        # Build fine-grained timeline at `timebin_origin` resolution
         origin = config.timebin_origin
-        time_index = pd.date_range(
+        time_index = date_range(
             start=df["effective_start_recording"].min(),
             end=df["effective_end_recording"].max(),
             freq=origin,
@@ -130,22 +136,22 @@ def from_path(
 
         # Initialize effort vector (0 = no recording, 1 = recording)
         # Compare each timestamp to all intervals in a vectorized manner
-        effort = pd.Series(0, index=time_index)
+        effort = Series(0, index=time_index)
 
-        # 5. Vectorized interval coverage
-        tvals = time_index.values[:, None]
-        start_vals = df["effective_start_recording"].values
-        end_vals = df["effective_end_recording"].values
+        # Vectorized interval coverage
+        t_vals = time_index.to_numpy()[:, None]
+        start_vals = df["effective_start_recording"].to_numpy()
+        end_vals = df["effective_end_recording"].to_numpy()
 
-        # Boolean matrix: True if timestamp is within any recording interval
-        covered = (tvals >= start_vals) & (tvals < end_vals)
+        # Boolean matrix: True if the timestamp is within any recording interval
+        covered = (t_vals >= start_vals) & (t_vals < end_vals)
         effort[:] = covered.any(axis=1).astype(int)
 
-        # 6. Aggregate effort into user-defined bin_size
+        # Aggregate effort into user-defined bin_size
         counts = effort.resample(bin_size).sum()
 
         # Replace index with IntervalIndex for downstream compatibility
-        counts.index = pd.interval_range(
+        counts.index = interval_range(
             start=counts.index[0],
             periods=len(counts),
             freq=bin_size,
diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py
index 650b5a8..faf2fd7 100644
--- a/src/post_processing/utils/filtering_utils.py
+++ b/src/post_processing/utils/filtering_utils.py
@@ -8,6 +8,7 @@
 from typing import TYPE_CHECKING
 
 import pytz
+from osekit.utils.timestamp_utils import strptime_from_text
 from pandas import (
     DataFrame,
     Timedelta,
@@ -570,16 +571,17 @@ def get_filename_timestamps(df: DataFrame, date_parser: str) -> list[Timestamp]:
 
     """
     tz = get_timezone(df)
-    try:
-        return [
-        to_datetime(
-            ts,
-            format=date_parser,
-        ).tz_localize(tz) for ts in df["filename"]
-        ]
-    except ValueError:
-        msg = """Could not parse timestamps from `df["filename"]`."""
-        raise ValueError(msg) from None
+    timestamps = [
+    strptime_from_text(
+        ts,
+        datetime_template=date_parser,
+    ) for ts in df["filename"]
+    ]
+
+    if all(t.tz is None for t in timestamps):
+        timestamps = [t.tz_localize(tz) for t in timestamps]
+
+    return timestamps
 
 
 def ensure_in_list(value: str, candidates: list[str], label: str) -> None:
diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 75c421f..f1cd07f 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -691,7 +691,7 @@ def shade_no_effort(
             start,
             start + width_days,
             facecolor="0.65",
-            alpha=0.08,
+            alpha=0.1,
             linewidth=0,
             zorder=0,
         )

From fd913cfa38f284f4be6ebbb0f6265ba91434490d Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Wed, 14 Jan 2026 16:30:07 +0100
Subject: [PATCH 12/33] effort legend

---
 src/post_processing/utils/plot_utils.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index f1cd07f..f5ad0df 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -11,6 +11,7 @@
 import numpy as np
 from matplotlib import dates as mdates
 from matplotlib.dates import num2date
+from matplotlib.patches import Patch
 from matplotlib.ticker import PercentFormatter
 from numpy import ceil, histogram, polyfit
 from pandas import (
@@ -114,9 +115,6 @@ def histo(
     else:
         legend_labels = None
 
-    # if effort:
-    #     normalize_counts_by_effort(df, effort, time_bin)
-
     n_groups = len(labels) if legend_labels else 1
     bar_width = bin_size / n_groups
     bin_starts = mdates.date2num(df.index)
@@ -684,6 +682,7 @@ def shade_no_effort(
 
     no_effort = effort_fraction == 0
     partial_effort = (effort_fraction > 0) & (effort_fraction < 1)
+
     # Draw partial effort first (lighter)
     for ts in bin_starts[partial_effort]:
         start = mdates.date2num(ts - bar_width)
@@ -694,6 +693,7 @@ def shade_no_effort(
             alpha=0.1,
             linewidth=0,
             zorder=0,
+            label="partial data",
         )
 
     # Draw no effort on top (darker)
@@ -706,8 +706,24 @@ def shade_no_effort(
             alpha=0.15,
             linewidth=0,
             zorder=0,
+            label="no data",
+        )
+
+    handles = []
+
+    if partial_effort.any():
+        handles.append(
+            Patch(facecolor="0.65", alpha=0.1, label="partial data")
+        )
+
+    if no_effort.any():
+        handles.append(
+            Patch(facecolor="0.45", alpha=0.15, label="no data")
         )
 
+    if handles:
+        ax.legend(handles=handles)
+
 
 def add_sunrise_sunset(ax: Axes, lat: float, lon: float, tz: tzinfo) -> None:
     """Display sunrise/sunset times on plot."""

From df40ec9ccf440425c60221f6d9fce35b21f3b74c Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Wed, 14 Jan 2026 16:36:38 +0100
Subject: [PATCH 13/33] ruff fix

---
 src/post_processing/dataclass/detection_filter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/post_processing/dataclass/detection_filter.py b/src/post_processing/dataclass/detection_filter.py
index b2d2599..b28c023 100644
--- a/src/post_processing/dataclass/detection_filter.py
+++ b/src/post_processing/dataclass/detection_filter.py
@@ -9,7 +9,7 @@
 
 from dataclasses import dataclass, fields
 from pathlib import Path
-from typing import TYPE_CHECKING, Literal, Any
+from typing import TYPE_CHECKING, Literal
 
 import yaml
 from pandas import Timedelta, Timestamp

From 0c74f8c40e373122fd6ed374581951fdbf17771a Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:26:42 +0100
Subject: [PATCH 14/33] better shade handling

---
 src/post_processing/utils/core_utils.py | 13 ++++---
 src/post_processing/utils/plot_utils.py | 50 ++++++++++++++-----------
 2 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py
index 9457ef2..32c5943 100644
--- a/src/post_processing/utils/core_utils.py
+++ b/src/post_processing/utils/core_utils.py
@@ -305,10 +305,10 @@ def add_season_period(
     )
 
     season_colors = {
-        "winter": "#2ce5e3",
-        "spring": "#4fcf50",
-        "summer": "#ffcf50",
-        "autumn": "#fb9a67",
+        "winter": "#84eceb",
+        "spring": "#91de92",
+        "summer": "#fce097",
+        "autumn": "#f9c1a5",
     }
 
     bin_centers = [
@@ -329,8 +329,9 @@ def add_season_period(
             width=(bins[i + 1] - bins[i]),
             color=season_colors[season],
             align="center",
-            zorder=0,
-            alpha=0.6,
+            zorder=6,
+            alpha=1,
+            linewidth=0,
         )
 
     ax.set_ylim(ax.dataLim.ymin, ax.dataLim.ymax)
diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index f5ad0df..2a22914 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -149,11 +149,6 @@ def histo(
     set_plot_title(ax, annotators, labels)
     ax.set_xlim(begin, end)
 
-    if season:
-        if lat is None or lon is None:
-            get_coordinates()
-        add_season_period(ax, northern=lat >= 0)
-
     if effort:
         shade_no_effort(
             ax=ax,
@@ -162,6 +157,11 @@ def histo(
             bar_width=bin_size,
         )
 
+    if season:
+        if lat is None or lon is None:
+            get_coordinates()
+        add_season_period(ax, northern=lat >= 0)
+
 
 def _prepare_timeline_plot(
     df: DataFrame,
@@ -675,54 +675,62 @@ def shade_no_effort(
         index=[i.left for i in observed.counts.index],
     ).tz_localize("UTC")
 
+    effort_by_end = Series(
+        observed.counts.values,
+        index=[i.left for i in observed.counts.index],
+    ).tz_localize("UTC")
+
     # Align effort to plotting bins
-    effort_aligned = effort_by_start.reindex(bin_starts)
+    effort_aligned_start = effort_by_start.reindex(bin_starts).fillna(0)
+    effort_aligned_end = effort_by_end.reindex(bin_starts + bar_width).fillna(0)
+    combined_effort = .5 * effort_aligned_start.add(effort_aligned_end, fill_value=0)
+
     max_effort = bar_width / observed.timebin_origin
-    effort_fraction = effort_aligned / max_effort
+    effort_fraction = combined_effort / max_effort
 
-    no_effort = effort_fraction == 0
-    partial_effort = (effort_fraction > 0) & (effort_fraction < 1)
+    no_effort = effort_fraction[effort_fraction == 0]
+    partial_effort = effort_fraction[(effort_fraction > 0) & (effort_fraction < 1)]
 
     # Draw partial effort first (lighter)
-    for ts in bin_starts[partial_effort]:
+    for ts in partial_effort.index:
         start = mdates.date2num(ts - bar_width)
         ax.axvspan(
             start,
             start + width_days,
             facecolor="0.65",
-            alpha=0.1,
+            alpha=.1,
             linewidth=0,
-            zorder=0,
+            zorder=3,
             label="partial data",
         )
 
     # Draw no effort on top (darker)
-    for ts in bin_starts[no_effort]:
+    for ts in no_effort.index:
         start = mdates.date2num(ts - bar_width)
         ax.axvspan(
             start,
             start + width_days,
             facecolor="0.45",
-            alpha=0.15,
+            alpha=.15,
             linewidth=0,
-            zorder=0,
+            zorder=3,
             label="no data",
         )
 
     handles = []
-
-    if partial_effort.any():
+    if len(partial_effort) > 0:
         handles.append(
             Patch(facecolor="0.65", alpha=0.1, label="partial data")
         )
-
-    if no_effort.any():
+    if len(no_effort) > 0:
         handles.append(
             Patch(facecolor="0.45", alpha=0.15, label="no data")
         )
-
     if handles:
-        ax.legend(handles=handles)
+        ax.legend(
+            handles=handles,
+            loc="best",
+        )
 
 
 def add_sunrise_sunset(ax: Axes, lat: float, lon: float, tz: tzinfo) -> None:

From ca539cb63478f265142f23a7e517ff9a3baadedf Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Thu, 15 Jan 2026 17:40:32 +0100
Subject: [PATCH 15/33] histo legend fix

---
 src/post_processing/utils/plot_utils.py | 60 ++++++++++++++-----------
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 2a22914..4b268ee 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -133,19 +133,17 @@ def histo(
             bar_kwargs["label"] = legend_labels[i]
 
         ax.bar(bin_starts + offset, df.iloc[:, i], **bar_kwargs)
-    if kwargs.get("show_recording_OFF"):
-        ax.set_facecolor("lightgrey")
 
     if len(df.columns) > 1 and legend:
-        ax.legend(labels=legend_labels, bbox_to_anchor=(1.01, 1), loc="upper left")
+        legend_histo = ax.legend(
+            labels=legend_labels,
+            bbox_to_anchor=(1.01, 1),
+            loc="upper left",
+        )
+        ax.add_artist(legend_histo)
 
-    y_label = (
-        f"Detections{(' normalized by effort' if effort else '')}"
-        f"\n(detections: {timedelta_to_str(time_bin)}"
-        f" - bin size: {bin_size_str})"
-    )
-    ax.set_ylabel(y_label)
-    # set_y_axis_to_percentage(ax) if effort else set_dynamic_ylim(ax, df)
+    ax.set_ylabel(f"Detections ({timedelta_to_str(time_bin)})")
+    ax.set_xlabel(f"Bin size ({bin_size_str})")
     set_plot_title(ax, annotators, labels)
     ax.set_xlim(begin, end)
 
@@ -613,12 +611,15 @@ def get_bin_size_str(bin_size: Timedelta | BaseOffset) -> str:
     return str(bin_size.n) + bin_size.freqstr
 
 
-def set_y_axis_to_percentage(
-    ax: plt.Axes,
-) -> None:
+def set_y_axis_to_percentage(ax: plt.Axes, max_val: float) -> None:
     """Set y-axis to percentage."""
-    ax.yaxis.set_major_formatter(PercentFormatter(xmax=1.0))
-    ax.set_yticks(np.arange(0, 1.02, 0.2))
+    ax.yaxis.set_major_formatter(
+        plt.FuncFormatter(lambda y, _: f"{(y / max_val) * 100:.0f}%")
+    )
+
+    current_label = ax.get_ylabel()
+    if current_label and "%" not in current_label:
+        ax.set_ylabel(f"{current_label} (%)")
 
 
 def set_dynamic_ylim(ax: plt.Axes,
@@ -691,6 +692,9 @@ def shade_no_effort(
     no_effort = effort_fraction[effort_fraction == 0]
     partial_effort = effort_fraction[(effort_fraction > 0) & (effort_fraction < 1)]
 
+    # Get legend handle
+    handles1, labels1 = ax.get_legend_handles_labels()
+
     # Draw partial effort first (lighter)
     for ts in partial_effort.index:
         start = mdates.date2num(ts - bar_width)
@@ -698,7 +702,7 @@ def shade_no_effort(
             start,
             start + width_days,
             facecolor="0.65",
-            alpha=.1,
+            alpha=0.1,
             linewidth=0,
             zorder=3,
             label="partial data",
@@ -711,25 +715,31 @@ def shade_no_effort(
             start,
             start + width_days,
             facecolor="0.45",
-            alpha=.15,
+            alpha=0.15,
             linewidth=0,
             zorder=3,
             label="no data",
         )
 
-    handles = []
+    handles_effort = []
     if len(partial_effort) > 0:
-        handles.append(
-            Patch(facecolor="0.65", alpha=0.1, label="partial data")
-        )
+        handles_effort.append(Patch(facecolor="0.65", alpha=0.1, label="partial data"))
     if len(no_effort) > 0:
-        handles.append(
+        handles_effort.append(
             Patch(facecolor="0.45", alpha=0.15, label="no data")
         )
-    if handles:
+    if handles_effort:
+
+        labels_effort = [h.get_label() for h in handles_effort]
+
+        handles = handles1 + handles_effort
+        labels = labels1 + labels_effort
+
         ax.legend(
-            handles=handles,
-            loc="best",
+            handles,
+            labels,
+            bbox_to_anchor=(1.01, 1),
+            loc="upper left",
         )
 
 

From 360804d798b71188953a91aa52fdd5368ab04aa7 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Thu, 15 Jan 2026 17:41:38 +0100
Subject: [PATCH 16/33] histo legend fix

---
 src/post_processing/utils/plot_utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 4b268ee..eaf3301 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -721,6 +721,7 @@ def shade_no_effort(
             label="no data",
         )
 
+    # Add effort legend to current plot legend
     handles_effort = []
     if len(partial_effort) > 0:
         handles_effort.append(Patch(facecolor="0.65", alpha=0.1, label="partial data"))
@@ -729,12 +730,9 @@ def shade_no_effort(
             Patch(facecolor="0.45", alpha=0.15, label="no data")
         )
     if handles_effort:
-
         labels_effort = [h.get_label() for h in handles_effort]
-
         handles = handles1 + handles_effort
         labels = labels1 + labels_effort
-
         ax.legend(
             handles,
             labels,

From ec8deeccc2753c8484435cbdf8f08a688226e647 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Thu, 15 Jan 2026 17:45:32 +0100
Subject: [PATCH 17/33] ruff fix

---
 src/post_processing/utils/plot_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index eaf3301..1e83042 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -12,7 +12,6 @@
 from matplotlib import dates as mdates
 from matplotlib.dates import num2date
 from matplotlib.patches import Patch
-from matplotlib.ticker import PercentFormatter
 from numpy import ceil, histogram, polyfit
 from pandas import (
     DataFrame,

From b7beb53f10d5d7a5558d0a76e337724dddad309b Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Fri, 16 Jan 2026 11:59:43 +0100
Subject: [PATCH 18/33] shade effort on scatter plot

---
 src/post_processing/dataclass/data_aplose.py | 19 +++++++++++--------
 src/post_processing/utils/core_utils.py      | 15 +++++++++++----
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py
index 3c8d1e4..e1d3cae 100644
--- a/src/post_processing/dataclass/data_aplose.py
+++ b/src/post_processing/dataclass/data_aplose.py
@@ -416,18 +416,20 @@ def plot(
             season = kwargs.get("season", False)
             bin_size = kwargs.get("bin_size")
 
-            return heatmap(df=df_filtered,
-                           ax=ax,
-                           bin_size=bin_size,
-                           time_range=time,
-                           show_rise_set=show_rise_set,
-                           season=season,
-                           coordinates=self.coordinates,
-                           )
+            return heatmap(
+                df=df_filtered,
+                ax=ax,
+                bin_size=bin_size,
+                time_range=time,
+                show_rise_set=show_rise_set,
+                season=season,
+                coordinates=self.coordinates,
+            )
 
         if mode == "scatter":
             show_rise_set = kwargs.get("show_rise_set", True)
             season = kwargs.get("season", False)
+            effort = kwargs.get("effort")
 
             return scatter(df=df_filtered,
                            ax=ax,
@@ -435,6 +437,7 @@ def plot(
                            show_rise_set=show_rise_set,
                            season=season,
                            coordinates=self.coordinates,
+                           effort=effort,
                            )
 
         if mode == "agreement":
diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py
index 32c5943..b1df6bd 100644
--- a/src/post_processing/utils/core_utils.py
+++ b/src/post_processing/utils/core_utils.py
@@ -303,6 +303,10 @@ def add_season_period(
         start=Timestamp(ax.get_xlim()[0], unit="D").floor("1D"),
         end=Timestamp(ax.get_xlim()[1], unit="D").ceil("1D"),
     )
+    bins = date_range(
+        start=Timestamp(ax.get_xlim()[0], unit="D"),
+        end=Timestamp(ax.get_xlim()[1], unit="D"),
+    )
 
     season_colors = {
         "winter": "#84eceb",
@@ -329,7 +333,7 @@ def add_season_period(
             width=(bins[i + 1] - bins[i]),
             color=season_colors[season],
             align="center",
-            zorder=6,
+            zorder=2,
             alpha=1,
             linewidth=0,
         )
@@ -520,8 +524,9 @@ def round_begin_end_timestamps(
     bin_size: Timedelta | BaseOffset,
 ) -> tuple[Timestamp, Timestamp, Timedelta]:
     """Return time vector given a bin size."""
-    if (not isinstance(timestamp_list, list) or
-            not all(isinstance(ts, Timestamp) for ts in timestamp_list)):
+    if not isinstance(timestamp_list, list) or not all(
+        isinstance(ts, Timestamp) for ts in timestamp_list
+    ):
         msg = "timestamp_list must be a list[Timestamp]"
         raise TypeError(msg)
 
@@ -535,6 +540,7 @@ def round_begin_end_timestamps(
         return start, end, bin_size
 
     if isinstance(bin_size, BaseOffset):
+        bin_size = offsets.Week(n=bin_size.n, weekday=timestamp_list[0].weekday())
         start = bin_size.rollback(min(timestamp_list))
         end = bin_size.rollforward(max(timestamp_list))
         if not isinstance(bin_size, (offsets.Hour, offsets.Minute, offsets.Second)):
@@ -545,7 +551,8 @@ def round_begin_end_timestamps(
 
         timestamp_range = date_range(start=start, end=end, freq=bin_size)
         bin_size = timestamp_range[1] - timestamp_range[0]
-        return start.floor(bin_size), end.ceil(bin_size), bin_size
+        # return start.floor(bin_size), end.ceil(bin_size), bin_size
+        return start, end, bin_size
 
     msg = "Could not get start/end timestamps."
     raise ValueError(msg)

From 92d0a95767e371e1f730359f647baad71117fc02 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Fri, 16 Jan 2026 12:00:54 +0100
Subject: [PATCH 19/33] better season mngmt on plots

---
 src/post_processing/utils/plot_utils.py | 46 +++++++++++--------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 1e83042..953d7db 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -20,6 +20,7 @@
     Series,
     Timedelta,
     Timestamp,
+    concat,
     date_range,
 )
 from pandas.tseries import frequencies
@@ -134,12 +135,11 @@ def histo(
         ax.bar(bin_starts + offset, df.iloc[:, i], **bar_kwargs)
 
     if len(df.columns) > 1 and legend:
-        legend_histo = ax.legend(
+        ax.legend(
             labels=legend_labels,
             bbox_to_anchor=(1.01, 1),
             loc="upper left",
         )
-        ax.add_artist(legend_histo)
 
     ax.set_ylabel(f"Detections ({timedelta_to_str(time_bin)})")
     ax.set_xlabel(f"Bin size ({bin_size_str})")
@@ -149,9 +149,7 @@ def histo(
     if effort:
         shade_no_effort(
             ax=ax,
-            bin_starts=df.index,
             observed=effort,
-            bar_width=bin_size,
         )
 
     if season:
@@ -200,7 +198,6 @@ def _prepare_timeline_plot(
     ax.set_ylim(0, 24)
     ax.set_yticks(range(0, 25, 2))
     ax.set_ylabel("Hour")
-    ax.set_xlabel("Date")
     ax.grid(color="k", linestyle="-", linewidth=0.2)
 
     set_plot_title(ax=ax, annotators=annotators, labels=labels)
@@ -217,7 +214,7 @@ def scatter(
     df: DataFrame,
     ax: Axes,
     time_range: DatetimeIndex,
-    **kwargs: bool | tuple[float, float],
+    **kwargs: bool | tuple[float, float] | RecordingPeriod,
 ) -> None:
     """Scatter-plot of detections for a given annotator and label.
 
@@ -241,6 +238,7 @@ def scatter(
     show_rise_set = kwargs.get("show_rise_set", False)
     season = kwargs.get("season", False)
     coordinates = kwargs.get("coordinates", False)
+    effort = kwargs.get("effort", False)
 
     _prepare_timeline_plot(
         df=df,
@@ -280,6 +278,12 @@ def scatter(
         framealpha=0.6,
     )
 
+    if effort:
+        shade_no_effort(
+            ax=ax,
+            observed=effort,
+        )
+
 
 def heatmap(df: DataFrame,
             ax: Axes,
@@ -646,9 +650,7 @@ def set_plot_title(ax: plt.Axes, annotators: list[str], labels: list[str]) -> No
 
 def shade_no_effort(
     ax: plt.Axes,
-    bin_starts: Index,
     observed: RecordingPeriod,
-    bar_width: Timedelta,
 ) -> None:
     """Shade areas of the plot where no observation effort was made.
 
@@ -656,38 +658,32 @@ def shade_no_effort(
     ----------
     ax : plt.Axes
         The axes on which to draw the shaded regions.
-    bin_starts : Index
-        A datetime index representing the start times of each bin.
     observed : RecordingPeriod
         A Series with observation counts or flags, indexed by datetime.
         Should be aligned or re-indexable to `bin_starts`.
-    bar_width : Timedelta
-        Width of each time bin. Used to compute the span of the shaded areas.
-
 
     """
-    """Shade areas of the plot where no observation effort was made."""
-    width_days = bar_width.total_seconds() / 86400
-
     # Convert effort IntervalIndex → DatetimeIndex (bin starts)
     effort_by_start = Series(
         observed.counts.values,
         index=[i.left for i in observed.counts.index],
-    ).tz_localize("UTC")
-
+    )
     effort_by_end = Series(
         observed.counts.values,
         index=[i.left for i in observed.counts.index],
-    ).tz_localize("UTC")
+    )
+    combined_effort = .5 * effort_by_start.add(effort_by_end, fill_value=0)
 
-    # Align effort to plotting bins
-    effort_aligned_start = effort_by_start.reindex(bin_starts).fillna(0)
-    effort_aligned_end = effort_by_end.reindex(bin_starts + bar_width).fillna(0)
-    combined_effort = .5 * effort_aligned_start.add(effort_aligned_end, fill_value=0)
+    bar_width = effort_by_start.index[1] - effort_by_start.index[0]
+    width_days = bar_width.total_seconds() / 86400
 
     max_effort = bar_width / observed.timebin_origin
     effort_fraction = combined_effort / max_effort
 
+    first_elem = Series([0], index=[effort_fraction.index[0] - bar_width])
+    last_elem = Series([0], index=[effort_fraction.index[-1] + bar_width])
+    effort_fraction = concat([first_elem, effort_fraction, last_elem])
+
     no_effort = effort_fraction[effort_fraction == 0]
     partial_effort = effort_fraction[(effort_fraction > 0) & (effort_fraction < 1)]
 
@@ -703,7 +699,7 @@ def shade_no_effort(
             facecolor="0.65",
             alpha=0.1,
             linewidth=0,
-            zorder=3,
+            zorder=1,
             label="partial data",
         )
 
@@ -716,7 +712,7 @@ def shade_no_effort(
             facecolor="0.45",
             alpha=0.15,
             linewidth=0,
-            zorder=3,
+            zorder=1,
             label="no data",
         )
 

From 552c7297acbee8701662617922041c9040bdba92 Mon Sep 17 00:00:00 2001
From: mathieudpnt <108517594+mathieudpnt@users.noreply.github.com>
Date: Mon, 19 Jan 2026 11:58:29 +0100
Subject: [PATCH 20/33] differenciate LT et ST data plots

---
 src/post_processing/utils/core_utils.py |  8 ++------
 src/post_processing/utils/plot_utils.py |  1 -
 tests/test_plot_utils.py                | 17 ++++++++---------
 3 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py
index b1df6bd..a1ae91f 100644
--- a/src/post_processing/utils/core_utils.py
+++ b/src/post_processing/utils/core_utils.py
@@ -299,10 +299,6 @@ def add_season_period(
         msg = "Axes have no data"
         raise ValueError(msg)
 
-    bins = date_range(
-        start=Timestamp(ax.get_xlim()[0], unit="D").floor("1D"),
-        end=Timestamp(ax.get_xlim()[1], unit="D").ceil("1D"),
-    )
     bins = date_range(
         start=Timestamp(ax.get_xlim()[0], unit="D"),
         end=Timestamp(ax.get_xlim()[1], unit="D"),
@@ -540,7 +536,6 @@ def round_begin_end_timestamps(
         return start, end, bin_size
 
     if isinstance(bin_size, BaseOffset):
-        bin_size = offsets.Week(n=bin_size.n, weekday=timestamp_list[0].weekday())
         start = bin_size.rollback(min(timestamp_list))
         end = bin_size.rollforward(max(timestamp_list))
         if not isinstance(bin_size, (offsets.Hour, offsets.Minute, offsets.Second)):
@@ -551,7 +546,8 @@ def round_begin_end_timestamps(
 
         timestamp_range = date_range(start=start, end=end, freq=bin_size)
         bin_size = timestamp_range[1] - timestamp_range[0]
-        # return start.floor(bin_size), end.ceil(bin_size), bin_size
+        if bin_size.resolution_string in ["s", "min", "h"]:
+            return start.floor(bin_size), end.ceil(bin_size), bin_size
         return start, end, bin_size
 
     msg = "Could not get start/end timestamps."
diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 953d7db..9c863f4 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -16,7 +16,6 @@
 from pandas import (
     DataFrame,
     DatetimeIndex,
-    Index,
     Series,
     Timedelta,
     Timestamp,
diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py
index ffabbbd..4a7e9cc 100644
--- a/tests/test_plot_utils.py
+++ b/tests/test_plot_utils.py
@@ -58,16 +58,15 @@ def test_wrap_xtick_labels_no_spaces():
     assert wrapped_labels[0] == expected
 
 
-def test_y_axis_formatter_and_ticks():
+def test_set_y_axis_to_percentage():
     fig, ax = plt.subplots()
-
-    set_y_axis_to_percentage(ax)
-
-    assert isinstance(ax.yaxis.get_major_formatter(), PercentFormatter)
-    assert ax.yaxis.get_major_formatter().xmax == 1.0
-
-    expected_ticks = arange(0, 1.02, 0.2)
-    testing.assert_allclose(ax.get_yticks(), expected_ticks)
+    ax.set_ylabel("Accuracy")
+    set_y_axis_to_percentage(ax, max_val=200)
+    formatter = ax.yaxis.get_major_formatter()
+    assert formatter(100, None) == "50%"
+    assert formatter(200, None) == "100%"
+    assert ax.get_ylabel() == "Accuracy (%)"
+    plt.close(fig)
 
 
 def test_single_annotator_multiple_labels():

From d0cda89ec0f61857fef98fc515780ae3bd758f20 Mon Sep 17 00:00:00 2001
From: mathieudpnt <108517594+mathieudpnt@users.noreply.github.com>
Date: Mon, 19 Jan 2026 12:00:21 +0100
Subject: [PATCH 21/33] test plot utils import fix

---
 tests/test_plot_utils.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_plot_utils.py b/tests/test_plot_utils.py
index 4a7e9cc..4306c38 100644
--- a/tests/test_plot_utils.py
+++ b/tests/test_plot_utils.py
@@ -1,8 +1,5 @@
-
 import matplotlib.pyplot as plt
 import pytest
-from matplotlib.ticker import PercentFormatter
-from numpy import arange, testing
 
 from post_processing.utils.plot_utils import (
     _wrap_xtick_labels,

From 4b9a2f1b43bec08e65307fc6e71d0a18522da80f Mon Sep 17 00:00:00 2001
From: mathieudpnt <108517594+mathieudpnt@users.noreply.github.com>
Date: Mon, 19 Jan 2026 15:51:42 +0100
Subject: [PATCH 22/33] ruff simplification

---
 src/post_processing/utils/plot_utils.py | 114 +++++++++++++-----------
 1 file changed, 60 insertions(+), 54 deletions(-)

diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 9c863f4..eaee63c 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -366,7 +366,7 @@ def heatmap(df: DataFrame,
     )
 
     if coordinates and season:
-        lat, lon = coordinates
+        lat, _ = coordinates
         add_season_period(ax, northern=lat >= 0)
 
     bin_size_str = get_bin_size_str(bin_size)
@@ -494,41 +494,33 @@ def agreement(
     """
     labels, annotators = get_labels_and_annotators(df)
 
-    datetimes1 = list(
-        df[(df["annotator"] == annotators[0]) & (df["annotation"] == labels[0])][
-            "start_datetime"
-        ],
-    )
-    datetimes2 = list(
-        df[(df["annotator"] == annotators[1]) & (df["annotation"] == labels[1])][
-            "start_datetime"
-        ],
-    )
+    datetimes = [
+        list(
+            df[
+                (df["annotator"] == annotators[i]) & (df["annotation"] == labels[i])
+                ]["start_datetime"],
+        )
+        for i in range(2)
+    ]
 
     # scatter plot
     n_annot_max = bin_size.total_seconds() / df["end_time"].iloc[0]
 
-    start = df["start_datetime"].min()
-    stop = df["start_datetime"].max()
-
     freq = (
         bin_size if isinstance(bin_size, Timedelta) else str(bin_size.n) + bin_size.name
     )
 
     bins = date_range(
-        start=start.floor(bin_size),
-        end=stop.ceil(bin_size),
+        start=df["start_datetime"].min().floor(bin_size),
+        end=df["start_datetime"].max().ceil(bin_size),
         freq=freq,
     )
 
-    hist1, _ = histogram(datetimes1, bins=bins)
-    hist2, _ = histogram(datetimes2, bins=bins)
-
     df_hist = (
         DataFrame(
             {
-                annotators[0]: hist1,
-                annotators[1]: hist2,
+                annotators[0]: histogram(datetimes[0], bins=bins)[0],
+                annotators[1]: histogram(datetimes[1], bins=bins)[0],
             },
         )
         / n_annot_max
@@ -616,7 +608,7 @@ def get_bin_size_str(bin_size: Timedelta | BaseOffset) -> str:
 def set_y_axis_to_percentage(ax: plt.Axes, max_val: float) -> None:
     """Set y-axis to percentage."""
     ax.yaxis.set_major_formatter(
-        plt.FuncFormatter(lambda y, _: f"{(y / max_val) * 100:.0f}%")
+        plt.FuncFormatter(lambda y, _: f"{(y / max_val) * 100:.0f}%"),
     )
 
     current_label = ax.get_ylabel()
@@ -667,17 +659,12 @@ def shade_no_effort(
         observed.counts.values,
         index=[i.left for i in observed.counts.index],
     )
-    effort_by_end = Series(
-        observed.counts.values,
-        index=[i.left for i in observed.counts.index],
-    )
-    combined_effort = .5 * effort_by_start.add(effort_by_end, fill_value=0)
 
     bar_width = effort_by_start.index[1] - effort_by_start.index[0]
     width_days = bar_width.total_seconds() / 86400
 
     max_effort = bar_width / observed.timebin_origin
-    effort_fraction = combined_effort / max_effort
+    effort_fraction = effort_by_start / max_effort
 
     first_elem = Series([0], index=[effort_fraction.index[0] - bar_width])
     last_elem = Series([0], index=[effort_fraction.index[-1] + bar_width])
@@ -689,39 +676,35 @@ def shade_no_effort(
     # Get legend handle
     handles1, labels1 = ax.get_legend_handles_labels()
 
-    # Draw partial effort first (lighter)
-    for ts in partial_effort.index:
-        start = mdates.date2num(ts - bar_width)
-        ax.axvspan(
-            start,
-            start + width_days,
-            facecolor="0.65",
-            alpha=0.1,
-            linewidth=0,
-            zorder=1,
-            label="partial data",
-        )
+    _draw_effort_spans(
+        ax=ax,
+        effort_index=partial_effort.index,
+        bar_width=bar_width,
+        width_days=width_days,
+        facecolor="0.65",
+        alpha=0.1,
+        label="partial data",
+    )
 
-    # Draw no effort on top (darker)
-    for ts in no_effort.index:
-        start = mdates.date2num(ts - bar_width)
-        ax.axvspan(
-            start,
-            start + width_days,
-            facecolor="0.45",
-            alpha=0.15,
-            linewidth=0,
-            zorder=1,
-            label="no data",
-        )
+    _draw_effort_spans(
+        ax=ax,
+        effort_index=no_effort.index,
+        bar_width=bar_width,
+        width_days=width_days,
+        facecolor="0.45",
+        alpha=0.15,
+        label="no data",
+    )
 
     # Add effort legend to current plot legend
     handles_effort = []
     if len(partial_effort) > 0:
-        handles_effort.append(Patch(facecolor="0.65", alpha=0.1, label="partial data"))
+        handles_effort.append(
+            Patch(facecolor="0.65", alpha=0.1, label="partial data"),
+        )
     if len(no_effort) > 0:
         handles_effort.append(
-            Patch(facecolor="0.45", alpha=0.15, label="no data")
+            Patch(facecolor="0.45", alpha=0.15, label="no data"),
         )
     if handles_effort:
         labels_effort = [h.get_label() for h in handles_effort]
@@ -735,6 +718,29 @@ def shade_no_effort(
         )
 
 
+def _draw_effort_spans(
+        ax: plt.Axes,
+        effort_index: DatetimeIndex,
+        bar_width: Timedelta,
+        width_days: float,
+        facecolor: str,
+        alpha: float,
+        label: str,
+) -> None:
+    """Draw vertical lines for effort plot."""
+    for ts in effort_index:
+        start = mdates.date2num(ts - bar_width)
+        ax.axvspan(
+            start,
+            start + width_days,
+            facecolor=facecolor,
+            alpha=alpha,
+            linewidth=0,
+            zorder=1,
+            label=label,
+        )
+
+
 def add_sunrise_sunset(ax: Axes, lat: float, lon: float, tz: tzinfo) -> None:
     """Display sunrise/sunset times on plot."""
     x_min, x_max = ax.get_xlim()

From bc647a664945e1ff403134b90b1a1ad8feb959b5 Mon Sep 17 00:00:00 2001
From: mathieudpnt <108517594+mathieudpnt@users.noreply.github.com>
Date: Mon, 19 Jan 2026 15:56:50 +0100
Subject: [PATCH 23/33] draw effort positional argument

---
 src/post_processing/utils/plot_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index eaee63c..97733b1 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -723,6 +723,7 @@ def _draw_effort_spans(
         effort_index: DatetimeIndex,
         bar_width: Timedelta,
         width_days: float,
+        *,
         facecolor: str,
         alpha: float,
         label: str,

From cb07568de99b52799987f5a6a7966095a1411dc9 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Mon, 19 Jan 2026 16:27:11 +0100
Subject: [PATCH 24/33] syntax

---
 src/post_processing/utils/core_utils.py | 42 ++++++++++++++++---------
 src/post_processing/utils/plot_utils.py | 12 +++----
 2 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py
index a1ae91f..7951192 100644
--- a/src/post_processing/utils/core_utils.py
+++ b/src/post_processing/utils/core_utils.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 import astral
 import easygui
@@ -35,7 +35,7 @@
 )
 
 if TYPE_CHECKING:
-    from datetime import tzinfo
+    from datetime import datetime, timedelta, tzinfo
     from pathlib import Path
 
     import matplotlib.pyplot as plt
@@ -44,10 +44,10 @@
 def get_season(ts: Timestamp, *, northern: bool = True) -> tuple[str, int]:
     """Determine the meteorological season from a Timestamp.
 
-    In the Northern hemisphere
+    In the Northern Hemisphere
     Winter: Dec-Feb, Spring: Mar-May, Summer: Jun-Aug, Autumn: Sep-Nov
 
-    In the Southern hemisphere
+    In the Southern Hemisphere
     Winter: Jun-Aug, Spring: Sep-Nov, Summer: Dec-Feb, Autumn: Mar-May
 
     Parameters
@@ -133,8 +133,18 @@ def get_sun_times(
         dt_sunset = Timestamp(sunset(gps.observer, date=date)).tz_convert(tz)
 
         # Convert sunrise and sunset to decimal hours
-        h_sunrise.append(dt_sunrise.hour + dt_sunrise.minute / 60 + dt_sunrise.second / 3600 + dt_sunrise.microsecond / 3_600_000_000)
-        h_sunset.append(dt_sunset.hour + dt_sunset.minute / 60 + dt_sunset.second / 3600 + dt_sunset.microsecond / 3_600_000_000)
+        h_sunrise.append(
+            dt_sunrise.hour +
+            dt_sunrise.minute / 60 +
+            dt_sunrise.second / 3600 +
+            dt_sunrise.microsecond / 3_600_000_000,
+        )
+        h_sunset.append(
+            dt_sunset.hour +
+            dt_sunset.minute / 60 +
+            dt_sunset.second / 3600 +
+            dt_sunset.microsecond / 3_600_000_000,
+        )
 
     return h_sunrise, h_sunset
 
@@ -215,8 +225,12 @@ def add_weak_detection(
     if not max_time:
         max_time = Timedelta(get_max_time(df), "s")
 
-    df["start_datetime"] = [strftime_osmose_format(start) for start in df["start_datetime"]]
-    df["end_datetime"] = [strftime_osmose_format(stop) for stop in df["end_datetime"]]
+    df["start_datetime"] = [
+        strftime_osmose_format(start) for start in df["start_datetime"]
+    ]
+    df["end_datetime"] = [
+        strftime_osmose_format(stop) for stop in df["end_datetime"]
+    ]
 
     for ant in annotators:
         for lbl in labels:
@@ -259,12 +273,12 @@ def add_weak_detection(
 
 
 def json2df(json_path: Path) -> DataFrame:
-    """Convert a metadatax json file into a DataFrame.
+    """Convert a metadatax JSON file into a DataFrame.
 
     Parameters
     ----------
     json_path: Path
-        Json file path
+        JSON file path
 
     """
     with json_path.open(encoding="utf-8") as f:
@@ -480,7 +494,7 @@ def get_labels_and_annotators(df: DataFrame) -> tuple[list, list]:
 
 
 def localize_timestamps(timestamps: list[Timestamp], tz: tzinfo) -> list[Timestamp]:
-    """Localize timestamps if necessary."""
+    """Localise timestamps if necessary."""
     localized = []
     for ts in timestamps:
         if ts.tzinfo is None or ts.tzinfo.utcoffset(ts) is None:
@@ -518,7 +532,7 @@ def get_time_range_and_bin_size(
 def round_begin_end_timestamps(
     timestamp_list: list[Timestamp],
     bin_size: Timedelta | BaseOffset,
-) -> tuple[Timestamp, Timestamp, Timedelta]:
+) -> tuple[Any, Any, Any]:
     """Return time vector given a bin size."""
     if not isinstance(timestamp_list, list) or not all(
         isinstance(ts, Timestamp) for ts in timestamp_list
@@ -546,7 +560,7 @@ def round_begin_end_timestamps(
 
         timestamp_range = date_range(start=start, end=end, freq=bin_size)
         bin_size = timestamp_range[1] - timestamp_range[0]
-        if bin_size.resolution_string in ["s", "min", "h"]:
+        if bin_size.resolution_string in {"s", "min", "h"}:
             return start.floor(bin_size), end.ceil(bin_size), bin_size
         return start, end, bin_size
 
@@ -555,7 +569,7 @@ def round_begin_end_timestamps(
 
 
 def timedelta_to_str(td: Timedelta) -> str:
-    """From a Timedelta to corresponding string."""
+    """From a Timedelta to the corresponding string."""
     seconds = int(td.total_seconds())
 
     if seconds % 86400 == 0:
diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 97733b1..3c3a5cc 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -75,15 +75,15 @@ def histo(
         - legend: bool
             Whether to show the legend.
         - color: str | list[str]
-            Color or list of colors for the histogram bars.
-            If not provided, default colors will be used.
+            Colour or list of colours for the histogram bars.
+            If not provided, default colours will be used.
         - season: bool
             Whether to show the season.
         - coordinates: tuple[float, float]
             The coordinates of the plotted detections.
         - effort: RecordingPeriod
             Object corresponding to the observation effort.
-            If provided, data will be normalized by observation effort.
+            If provided, data will be normalised by observation effort.
 
     """
     labels, annotators = zip(*[col.rsplit("-", 1) for col in df.columns], strict=False)
@@ -473,7 +473,7 @@ def agreement(
     bin_size: Timedelta | BaseOffset,
     ax: plt.Axes,
 ) -> None:
-    """Compute and visualize agreement between two annotators.
+    """Compute and visualise agreement between two annotators.
 
     This function compares annotation timestamps from two annotators over a time range.
     It also fits and plots a linear regression line and displays the coefficient
@@ -556,8 +556,8 @@ def timeline(
         Matplotlib axes object where the scatterplot and regression line will be drawn.
     **kwargs: Additional keyword arguments depending on the mode.
         - color: str | list[str]
-            Color or list of colors for the histogram bars.
-            If not provided, default colors will be used.
+            Colour or list of colours for the histogram bars.
+            If not provided, default colours will be used.
 
     """
     color = kwargs.get("color")

From 9d3188a1e0655b9ebb44a8c485df88ffd0b85f7e Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Tue, 20 Jan 2026 14:35:58 +0100
Subject: [PATCH 25/33] hotfix partial/no data bars

---
 src/post_processing/utils/plot_utils.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 3c3a5cc..c4f79fb 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -660,11 +660,18 @@ def shade_no_effort(
         index=[i.left for i in observed.counts.index],
     )
 
-    bar_width = effort_by_start.index[1] - effort_by_start.index[0]
+    effort_by_end = Series(
+        observed.counts.values,
+        index=[i.right for i in observed.counts.index],
+    )
+
+    combined_effort = 0.5 * effort_by_start.add(effort_by_end, fill_value=0)
+
+    bar_width = combined_effort.index[1] - combined_effort.index[0]
     width_days = bar_width.total_seconds() / 86400
 
     max_effort = bar_width / observed.timebin_origin
-    effort_fraction = effort_by_start / max_effort
+    effort_fraction = combined_effort / max_effort
 
     first_elem = Series([0], index=[effort_fraction.index[0] - bar_width])
     last_elem = Series([0], index=[effort_fraction.index[-1] + bar_width])
@@ -679,7 +686,6 @@ def shade_no_effort(
     _draw_effort_spans(
         ax=ax,
         effort_index=partial_effort.index,
-        bar_width=bar_width,
         width_days=width_days,
         facecolor="0.65",
         alpha=0.1,
@@ -689,7 +695,6 @@ def shade_no_effort(
     _draw_effort_spans(
         ax=ax,
         effort_index=no_effort.index,
-        bar_width=bar_width,
         width_days=width_days,
         facecolor="0.45",
         alpha=0.15,
@@ -721,7 +726,6 @@ def shade_no_effort(
 def _draw_effort_spans(
         ax: plt.Axes,
         effort_index: DatetimeIndex,
-        bar_width: Timedelta,
         width_days: float,
         *,
         facecolor: str,
@@ -730,7 +734,7 @@ def _draw_effort_spans(
 ) -> None:
     """Draw vertical lines for effort plot."""
     for ts in effort_index:
-        start = mdates.date2num(ts - bar_width)
+        start = mdates.date2num(ts)
         ax.axvspan(
             start,
             start + width_days,

From 4c2fc0d62f66693ff6247172bb26eda8a57848f7 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Tue, 20 Jan 2026 14:39:12 +0100
Subject: [PATCH 26/33] ruff import

---
 src/post_processing/utils/core_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py
index 7951192..c149155 100644
--- a/src/post_processing/utils/core_utils.py
+++ b/src/post_processing/utils/core_utils.py
@@ -35,7 +35,7 @@
 )
 
 if TYPE_CHECKING:
-    from datetime import datetime, timedelta, tzinfo
+    from datetime import tzinfo
     from pathlib import Path
 
     import matplotlib.pyplot as plt

From c0e968f178caba05e4b8205c8b82f0b265b727e8 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Tue, 20 Jan 2026 16:24:19 +0100
Subject: [PATCH 27/33] RecordingPeriod hotfix

---
 .../dataclass/recording_period.py             | 20 +++++++++----------
 src/post_processing/utils/plot_utils.py       | 11 ++--------
 tests/test_recording_period.py                | 20 ++++++++++---------
 3 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py
index 637733e..f19fab2 100644
--- a/src/post_processing/dataclass/recording_period.py
+++ b/src/post_processing/dataclass/recording_period.py
@@ -9,14 +9,15 @@
 from typing import TYPE_CHECKING
 
 from pandas import (
+    IntervalIndex,
     Series,
     Timedelta,
     date_range,
-    interval_range,
     read_csv,
     to_datetime,
 )
 
+from post_processing.utils.core_utils import round_begin_end_timestamps
 from post_processing.utils.filtering_utils import (
     find_delimiter,
 )
@@ -101,7 +102,7 @@ def from_path(
             msg = f"CSV is missing required columns: {', '.join(sorted(missing))}"
             raise ValueError(msg)
 
-        # Normalize timezones: convert to UTC, then remove tz info (naive)
+        # Normalise timezones: convert to UTC, then remove tz info (naive)
         for col in [
             "start_recording",
             "end_recording",
@@ -134,11 +135,11 @@ def from_path(
             freq=origin,
         )
 
-        # Initialize effort vector (0 = no recording, 1 = recording)
+        # Initialise effort vector (0 = no recording, 1 = recording)
         # Compare each timestamp to all intervals in a vectorized manner
         effort = Series(0, index=time_index)
 
-        # Vectorized interval coverage
+        # Vectorised interval coverage
         t_vals = time_index.to_numpy()[:, None]
         start_vals = df["effective_start_recording"].to_numpy()
         end_vals = df["effective_end_recording"].to_numpy()
@@ -148,13 +149,12 @@ def from_path(
         effort[:] = covered.any(axis=1).astype(int)
 
         # Aggregate effort into user-defined bin_size
-        counts = effort.resample(bin_size).sum()
+        counts = effort.resample(bin_size, closed="left", label="left").sum()
 
-        # Replace index with IntervalIndex for downstream compatibility
-        counts.index = interval_range(
-            start=counts.index[0],
-            periods=len(counts),
-            freq=bin_size,
+        counts.index = IntervalIndex.from_arrays(
+            counts.index,
+            counts.index +
+            round_begin_end_timestamps(list(counts.index), bin_size)[-1],
             closed="left",
         )
 
diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index c4f79fb..3a7f486 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -660,18 +660,11 @@ def shade_no_effort(
         index=[i.left for i in observed.counts.index],
     )
 
-    effort_by_end = Series(
-        observed.counts.values,
-        index=[i.right for i in observed.counts.index],
-    )
-
-    combined_effort = 0.5 * effort_by_start.add(effort_by_end, fill_value=0)
-
-    bar_width = combined_effort.index[1] - combined_effort.index[0]
+    bar_width = effort_by_start.index[1] - effort_by_start.index[0]
     width_days = bar_width.total_seconds() / 86400
 
     max_effort = bar_width / observed.timebin_origin
-    effort_fraction = combined_effort / max_effort
+    effort_fraction = effort_by_start / max_effort
 
     first_elem = Series([0], index=[effort_fraction.index[0] - bar_width])
     last_elem = Series([0], index=[effort_fraction.index[-1] + bar_width])
diff --git a/tests/test_recording_period.py b/tests/test_recording_period.py
index 41b9e59..73dd55e 100644
--- a/tests/test_recording_period.py
+++ b/tests/test_recording_period.py
@@ -1,10 +1,11 @@
-import pandas as pd
+from pandas import Interval, Timestamp
 from pandas.tseries import frequencies
 
+from post_processing.dataclass.detection_filter import DetectionFilter
 from post_processing.dataclass.recording_period import RecordingPeriod
 
 
-def test_recording_period_with_gaps(recording_planning_config):
+def test_recording_period_with_gaps(recording_planning_config: DetectionFilter) -> None:
     """RecordingPeriod correctly represents long gaps with no recording effort.
 
     The planning contains two recording blocks separated by ~3 weeks with no
@@ -27,22 +28,23 @@ def test_recording_period_with_gaps(recording_planning_config):
     assert counts.index.is_interval()
     assert counts.min() >= 0
 
-    # One week = 7 * 24 hours (origin = 1min)
+    # One week = 7 * 24 hours (origin = 1 min)
     full_week_minutes = 7 * 24 * 60
 
     # ------------------------------------------------------------------
     # Helper: find the bin covering a given timestamp
     # ------------------------------------------------------------------
-    def bin_covering(ts: pd.Timestamp) -> pd.Interval:
+    def bin_covering(ts: Timestamp) -> Interval:
         for interval in counts.index:
             if interval.left <= ts < interval.right:
                 return interval
-        raise AssertionError(f"No bin covers timestamp {ts}")
+        msg = f"No bin covers timestamp {ts}"
+        raise AssertionError(msg)
 
     # ------------------------------------------------------------------
     # Week fully inside the long gap → zero effort
     # ------------------------------------------------------------------
-    gap_ts = pd.Timestamp("2024-04-21")
+    gap_ts = Timestamp("2024-04-21")
 
     gap_bin = bin_covering(gap_ts)
     assert counts.loc[gap_bin] == 0
@@ -50,7 +52,7 @@ def bin_covering(ts: pd.Timestamp) -> pd.Interval:
     # ------------------------------------------------------------------
     # Week fully inside recording → full effort
     # ------------------------------------------------------------------
-    full_effort_ts = pd.Timestamp("2024-02-04")
+    full_effort_ts = Timestamp("2024-02-04")
 
     full_bin = bin_covering(full_effort_ts)
     assert counts.loc[full_bin] == full_week_minutes
@@ -58,7 +60,7 @@ def bin_covering(ts: pd.Timestamp) -> pd.Interval:
     # ------------------------------------------------------------------
     # Week overlapping recording stop → partial effort
     # ------------------------------------------------------------------
-    partial_ts = pd.Timestamp("2024-04-14")
+    partial_ts = Timestamp("2024-04-14")
 
     partial_bin = bin_covering(partial_ts)
-    assert counts.loc[partial_bin] == 1560
+    assert counts.loc[partial_bin] == 0

From dcb70fb141760c4d75a1678c2e3fd8ad7257a972 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Tue, 20 Jan 2026 16:25:29 +0100
Subject: [PATCH 28/33] syntax

---
 src/post_processing/dataclass/recording_period.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py
index f19fab2..d0d48b7 100644
--- a/src/post_processing/dataclass/recording_period.py
+++ b/src/post_processing/dataclass/recording_period.py
@@ -42,7 +42,7 @@ def from_path(
         *,
         bin_size: Timedelta | BaseOffset,
     ) -> RecordingPeriod:
-        """Vectorized creation of recording coverage from CSV with start/end datetimes.
+        """Vectorised creation of recording coverage from CSV with start/end datetimes.
 
         This method reads a CSV with columns:
         - "start_recording"
@@ -61,7 +61,7 @@ def from_path(
             - `timestamp_file`: path to CSV
             - `timebin_origin`: Timedelta resolution of detections
         bin_size : Timedelta or BaseOffset
-            Size of the aggregation bin (e.g., Timedelta("1H") or "1D").
+            Size of the aggregation bin (e.g. Timedelta("1H") or "1D").
 
         Returns
         -------
@@ -121,7 +121,9 @@ def from_path(
         ].min(axis=1)
 
         # Remove rows with no actual recording interval
-        df = df.loc[df["effective_start_recording"] < df["effective_end_recording"]].copy()
+        df = df.loc[
+            df["effective_start_recording"] < df["effective_end_recording"]
+        ].copy()
 
         if df.empty:
             msg = "No valid recording intervals after deployment intersection."
@@ -136,7 +138,7 @@ def from_path(
         )
 
         # Initialise effort vector (0 = no recording, 1 = recording)
-        # Compare each timestamp to all intervals in a vectorized manner
+        # Compare each timestamp to all intervals in a vectorised manner
         effort = Series(0, index=time_index)
 
         # Vectorised interval coverage

From 47b170d89ecc620618bfd0e24533f7a0776d1202 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Tue, 20 Jan 2026 17:20:43 +0100
Subject: [PATCH 29/33] legend histo shade

---
 src/post_processing/utils/plot_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 3a7f486..8d12fa3 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -149,6 +149,7 @@ def histo(
         shade_no_effort(
             ax=ax,
             observed=effort,
+            legend=legend,
         )
 
     if season:
@@ -642,6 +643,7 @@ def set_plot_title(ax: plt.Axes, annotators: list[str], labels: list[str]) -> No
 def shade_no_effort(
     ax: plt.Axes,
     observed: RecordingPeriod,
+    legend: bool,
 ) -> None:
     """Shade areas of the plot where no observation effort was made.
 
@@ -652,6 +654,8 @@ def shade_no_effort(
     observed : RecordingPeriod
         A Series with observation counts or flags, indexed by datetime.
         Should be aligned or re-indexable to `bin_starts`.
+    legend : bool
+        Wether to add the legend entry for the shaded regions.
 
     """
     # Convert effort IntervalIndex → DatetimeIndex (bin starts)
@@ -704,7 +708,7 @@ def shade_no_effort(
         handles_effort.append(
             Patch(facecolor="0.45", alpha=0.15, label="no data"),
         )
-    if handles_effort:
+    if handles_effort and legend:
         labels_effort = [h.get_label() for h in handles_effort]
         handles = handles1 + handles_effort
         labels = labels1 + labels_effort

From ef0e2b6474f8044de561f39a70372178892de9e5 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Thu, 22 Jan 2026 11:52:18 +0100
Subject: [PATCH 30/33] improve test_recording_period

---
 tests/test_recording_period.py | 82 ++++++++++++++++++++--------------
 1 file changed, 48 insertions(+), 34 deletions(-)

diff --git a/tests/test_recording_period.py b/tests/test_recording_period.py
index 73dd55e..064c6b5 100644
--- a/tests/test_recording_period.py
+++ b/tests/test_recording_period.py
@@ -1,5 +1,4 @@
-from pandas import Interval, Timestamp
-from pandas.tseries import frequencies
+from pandas import Timedelta, read_csv, to_datetime
 
 from post_processing.dataclass.detection_filter import DetectionFilter
 from post_processing.dataclass.recording_period import RecordingPeriod
@@ -14,53 +13,68 @@ def test_recording_period_with_gaps(recording_planning_config: DetectionFilter)
     - weeks with partial effort,
     - weeks with zero effort.
     """
+    histo_x_bin_size = Timedelta("7D")
     recording_period = RecordingPeriod.from_path(
         config=recording_planning_config,
-        bin_size=frequencies.to_offset("1W"),
+        bin_size=histo_x_bin_size,
     )
 
     counts = recording_period.counts
+    origin = recording_planning_config.timebin_origin
+    nb_timebin_origin_per_histo_x_bin_size = int(histo_x_bin_size / origin)
 
+    # Computes effective recording intervals from recording planning csv
+    df_planning = read_csv(
+        recording_planning_config.timestamp_file,
+        parse_dates=[
+            "start_recording",
+            "end_recording",
+            "start_deployment",
+            "end_deployment",
+        ],
+    )
+    for col in [
+        "start_recording",
+        "end_recording",
+        "start_deployment",
+        "end_deployment",
+    ]:
+        df_planning[col] = (
+            to_datetime(df_planning[col], utc=True)
+            .dt.tz_convert(None)
+        )
+
+    df_planning["start"] = df_planning[
+        ["start_recording", "start_deployment"]
+    ].max(axis=1)
+    df_planning["end"] = df_planning[
+        ["end_recording", "end_deployment"]
+    ].min(axis=1)
+
+    planning = df_planning.loc[df_planning["start"] < df_planning["end"]]
     # ------------------------------------------------------------------
     # Structural checks
     # ------------------------------------------------------------------
     assert not counts.empty
     assert counts.index.is_interval()
     assert counts.min() >= 0
-
-    # One week = 7 * 24 hours (origin = 1 min)
-    full_week_minutes = 7 * 24 * 60
-
-    # ------------------------------------------------------------------
-    # Helper: find the bin covering a given timestamp
-    # ------------------------------------------------------------------
-    def bin_covering(ts: Timestamp) -> Interval:
-        for interval in counts.index:
-            if interval.left <= ts < interval.right:
-                return interval
-        msg = f"No bin covers timestamp {ts}"
-        raise AssertionError(msg)
+    assert counts.max() <= nb_timebin_origin_per_histo_x_bin_size
 
     # ------------------------------------------------------------------
-    # Week fully inside the long gap → zero effort
+    # Find overlap (number of timebin_origin) within each effective recording period
     # ------------------------------------------------------------------
-    gap_ts = Timestamp("2024-04-21")
+    for interval in counts.index:
+        bin_start = interval.left
+        bin_end = interval.right
 
-    gap_bin = bin_covering(gap_ts)
-    assert counts.loc[gap_bin] == 0
+        # Compute overlap with all recording intervals
+        overlap_start = planning["start"].clip(lower=bin_start, upper=bin_end)
+        overlap_end = planning["end"].clip(lower=bin_start, upper=bin_end)
 
-    # ------------------------------------------------------------------
-    # Week fully inside recording → full effort
-    # ------------------------------------------------------------------
-    full_effort_ts = Timestamp("2024-02-04")
-
-    full_bin = bin_covering(full_effort_ts)
-    assert counts.loc[full_bin] == full_week_minutes
-
-    # ------------------------------------------------------------------
-    # Week overlapping recording stop → partial effort
-    # ------------------------------------------------------------------
-    partial_ts = Timestamp("2024-04-14")
+        overlap = (overlap_end - overlap_start).clip(lower=Timedelta(0))
+        expected_minutes = int(overlap.sum() / recording_planning_config.timebin_origin)
 
-    partial_bin = bin_covering(partial_ts)
-    assert counts.loc[partial_bin] == 0
+        assert counts.loc[interval] == expected_minutes, (
+            f"Mismatch for bin {interval}: "
+            f"expected {expected_minutes}, got {counts.loc[interval]}"
+        )

From 2a5a8c839088880a1f2cc307f07960eff8af925b Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Mon, 26 Jan 2026 17:03:51 +0100
Subject: [PATCH 31/33] refacto

---
 src/post_processing/dataclass/data_aplose.py |  23 +-
 src/post_processing/utils/core_utils.py      |  44 +-
 src/post_processing/utils/filtering_utils.py |  25 +-
 src/post_processing/utils/fpod_utils.py      | 691 ++++---------------
 src/post_processing/utils/glider_utils.py    |   2 +-
 src/post_processing/utils/plot_utils.py      |   2 +-
 tests/test_fpod_utils.py                     |   4 +-
 user_case/example_FPOD-CPOD_raw.ipynb        |  10 +-
 user_case/resource/CPOD-FPOD_yaml.yml        |   6 +-
 user_case/user_case_CALAIS.ipynb             |  17 +-
 10 files changed, 207 insertions(+), 617 deletions(-)

diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py
index e1d3cae..ce2d9da 100644
--- a/src/post_processing/dataclass/data_aplose.py
+++ b/src/post_processing/dataclass/data_aplose.py
@@ -431,14 +431,15 @@ def plot(
             season = kwargs.get("season", False)
             effort = kwargs.get("effort")
 
-            return scatter(df=df_filtered,
-                           ax=ax,
-                           time_range=time,
-                           show_rise_set=show_rise_set,
-                           season=season,
-                           coordinates=self.coordinates,
-                           effort=effort,
-                           )
+            return scatter(
+                df=df_filtered,
+                ax=ax,
+                time_range=time,
+                show_rise_set=show_rise_set,
+                season=season,
+                coordinates=self.coordinates,
+                effort=effort,
+            )
 
         if mode == "agreement":
             bin_size = kwargs.get("bin_size")
@@ -452,7 +453,11 @@ def plot(
                 label,
             )
 
-            return timeline(df=df_filtered, ax=ax, color=color)
+            return timeline(
+                df=df_filtered,
+                ax=ax,
+                color=color,
+            )
 
         msg = f"Unsupported plot mode: {mode}"
         raise ValueError(msg)
diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py
index c149155..d0125ae 100644
--- a/src/post_processing/utils/core_utils.py
+++ b/src/post_processing/utils/core_utils.py
@@ -8,7 +8,8 @@
 import astral
 import easygui
 import numpy as np
-from astral.sun import sunrise, sunset
+from astral import LocationInfo
+from astral.sun import sunrise, sunset, sun
 from matplotlib import pyplot as plt
 from osekit.config import TIMESTAMP_FORMAT_AUDIO_FILE
 from osekit.utils.timestamp_utils import strftime_osmose_format, strptime_from_text
@@ -579,3 +580,44 @@ def timedelta_to_str(td: Timedelta) -> str:
     if seconds % 60 == 0:
         return f"{seconds // 60}min"
     return f"{seconds}s"
+
+
+def assign_light_regime(
+    ts: Timestamp,
+    lat: float | None = None,
+    lon: float | None = None,
+) -> DataFrame:
+    """Assign daylight regime to temporal events.
+
+    Parameters
+    ----------
+    ts: Timestamp
+        Timestamp to assign a light regime to.
+    lat: float
+        The latitude of corresponding point.
+    lon: float
+        The longitude of corresponding point.
+
+    Returns
+    -------
+    DataFrame
+        The same dataframe with the column daytime.
+
+    """
+    if not all([lat, lon]):
+        lat, lon = get_coordinates()
+
+    # Get sun times for given location
+    location = LocationInfo(latitude=lat, longitude=lon)
+    s = sun(location.observer, date=ts.date())
+
+    if ts < s['dawn']:
+        return 'night'
+    elif ts < s['sunrise']:
+        return 'dawn'
+    elif ts < s['sunset']:
+        return 'day'
+    elif ts < s['dusk']:
+        return 'dusk'
+    else:
+        return 'night'
diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py
index faf2fd7..661324b 100644
--- a/src/post_processing/utils/filtering_utils.py
+++ b/src/post_processing/utils/filtering_utils.py
@@ -510,8 +510,8 @@ def reshape_timebin(
     timebin_new: Timedelta
         The size of the new time bin.
     timestamp_audio: list[Timestamp]
-        A list of Timestamp objects corresponding to the start of each wav
-         that corresponds to a detection
+        A list of Timestamp objects corresponding to the shape
+        in which the data should be reshaped.
 
     Returns
     -------
@@ -571,17 +571,16 @@ def get_filename_timestamps(df: DataFrame, date_parser: str) -> list[Timestamp]:
 
     """
     tz = get_timezone(df)
-    timestamps = [
-    strptime_from_text(
-        ts,
-        datetime_template=date_parser,
-    ) for ts in df["filename"]
-    ]
-
-    if all(t.tz is None for t in timestamps):
-        timestamps = [t.tz_localize(tz) for t in timestamps]
-
-    return timestamps
+    try:
+        return [
+        to_datetime(
+            ts,
+            format=date_parser,
+        ).tz_localize(tz) for ts in df["filename"]
+        ]
+    except ValueError:
+        msg = """Could not parse timestamps from `df["filename"]`."""
+        raise ValueError(msg) from None
 
 
 def ensure_in_list(value: str, candidates: list[str], label: str) -> None:
diff --git a/src/post_processing/utils/fpod_utils.py b/src/post_processing/utils/fpod_utils.py
index d36bb89..12cee28 100644
--- a/src/post_processing/utils/fpod_utils.py
+++ b/src/post_processing/utils/fpod_utils.py
@@ -15,9 +15,10 @@
 from pandas import (
     DataFrame,
     DateOffset,
+    Series,
     Timedelta,
+    Timestamp,
     concat,
-    date_range,
     notna,
     read_csv,
     to_datetime,
@@ -25,7 +26,7 @@
     to_timedelta,
 )
 
-from post_processing.utils.core_utils import get_coordinates, get_sun_times
+from post_processing.utils.filtering_utils import find_delimiter
 from user_case.config import season_color, site_colors
 
 if TYPE_CHECKING:
@@ -40,9 +41,9 @@ def pod2aplose(
     dataset_name: str,
     annotation: str,
     annotator: str,
-    bin_size: int = 60,
+    bin_size: Timedelta,
 ) -> DataFrame:
-    """Format PODs DataFrame to match APLOSE format.
+    """Format PODs DataFrame to match an APLOSE format.
 
     Parameters
     ----------
@@ -56,7 +57,7 @@ def pod2aplose(
         annotation name.
     annotator: str
         annotator name.
-    bin_size: int
+    bin_size: Timedelta
         Duration of the detections in seconds.
 
     Returns
@@ -65,22 +66,19 @@ def pod2aplose(
         An APLOSE formatted DataFrame.
 
     """
-    df = df.copy()
-    df["_temp_dt"] = [
-        tz.localize(strptime_from_text(entry, "%d/%m/%Y %H:%M"))
-        for entry in df["ChunkEnd"]
+    df["Datetime"] = [
+        tz.localize(entry)
+        for entry in df["Datetime"]
     ]
 
-    df = df.sort_values("_temp_dt").reset_index(drop=True)
-
-    fpod_start_dt = df["_temp_dt"].tolist()
-    fpod_end_dt = [entry + Timedelta(seconds=bin_size) for entry in fpod_start_dt]
+    fpod_start_dt = df["Datetime"]
+    fpod_end_dt = [entry + bin_size for entry in df["Datetime"]]
 
     data = {
         "dataset": [dataset_name] * len(df),
         "filename": [strftime_osmose_format(entry) for entry in fpod_start_dt],
         "start_time": [0] * len(df),
-        "end_time": [bin_size] * len(df),
+        "end_time": [bin_size.total_seconds()] * len(df),
         "start_frequency": [0] * len(df),
         "end_frequency": [0] * len(df),
         "annotation": [annotation] * len(df),
@@ -88,304 +86,121 @@ def pod2aplose(
         "start_datetime": [strftime_osmose_format(entry) for entry in fpod_start_dt],
         "end_datetime": [strftime_osmose_format(entry) for entry in fpod_end_dt],
         "is_box": [0] * len(df),
-        "deploy.name": df["deploy.name"].tolist(),
+        "deploy": df["Deploy"].tolist(),
     }
 
     return DataFrame(data)
 
 
-def csv_folder(
-    folder_path: Path,
-    sep: str = ";",
-    encoding: str = "latin-1",
+def load_pod_folder(
+    folder: Path,
+    ext: str,
 ) -> DataFrame:
-    """Process all CSV files from a folder.
+    """Read POD's result files from a folder.
 
     Parameters
     ----------
-    folder_path: Path
+    folder: Path
         Folder's place.
-    sep: str, default=";"
-        Column separator.
-    encoding: str, default="latin-1"
-        File encoding.
+    ext: str
+        File extension of result files.
 
     Returns
     -------
     DataFrame
-        Concatenated data with optional filename column.
+        Concatenated data.
 
     Raises
     ------
     ValueError
-        If no CSV files found.
+        If no result files are found.
 
     """
-    all_files = list(folder_path.rglob("*.csv"))
-
-    if not all_files:
-        msg = f"No .csv files found in {folder_path}"
+    if ext not in {"csv", "txt"}:
+        msg = f"Invalid file extension: {ext}"
         raise ValueError(msg)
 
-    all_data = []
-    for file in all_files:
-        df = read_csv(file, sep=sep, encoding=encoding, dtype="O")
-        df["deploy.name"] = file.stem
-        all_data.append(df)
-
-    return concat(all_data, ignore_index=True)
-
-
-def txt_folder(
-    folder_path: Path,
-    sep: str = "\t",
-) -> DataFrame:
-    r"""Process all TXT files from a folder.
-
-    Parameters
-    ----------
-    folder_path: Path
-        Folder's place.
-    sep: str, default="\t"
-        Column separator.
-
-    Returns
-    -------
-    DataFrame
-       Concatenated data from all TXT files.
-
-    """
-    all_files = list(Path(folder_path).rglob("*.txt"))
+    all_files = sorted(folder.rglob(f"*.{ext}"))
 
     if not all_files:
-        msg = f"No .txt files found in {folder_path}"
+        msg = f"No .{ext} files found in {folder}"
         raise ValueError(msg)
 
     all_data = []
     for file in all_files:
-        file_path = folder_path / file
-        df = read_csv(file_path, sep=sep)
-        all_data.append(df)
-
-    return concat(all_data, ignore_index=True)
-
+        sep = find_delimiter(file)
+        df = read_csv(
+            file,
+            sep=sep,
+        )
 
-def parse_timestamps(
-    df: DataFrame,
-    col_timestamp: str,
-    date_formats: list[str] | None = None,
-) -> DataFrame:
-    """Parse timestamp column with multiple possible formats.
+        df["Deploy"] = file.stem.strip().lower().replace(" ", "_")
+        all_data.append(df)
 
-    Parameters
-    ----------
-    df: DataFrame
-        Input dataframe.
-    col_timestamp: str
-        Name of the timestamp column to parse.
-    date_formats: list[str], optional
-        List of strptime formats to try. If None, uses common formats.
+    data = concat(all_data, ignore_index=True)
 
-    Returns
-    -------
-    DataFrame
-        Copy of df with parsed timestamps.
+    if ext == "csv":
+        if "%TimeLost" in data.columns:
+            data_filtered = data[data["File"].notna()].copy()
+            data_filtered = data_filtered[data_filtered["Nall/m"].notna()]
+        else:
+            data_filtered = data[data["DPM"] > 0].copy()
+            data_filtered = data_filtered[data_filtered["Nall"].notna()]
 
-    """
-    if date_formats is None:
-        date_formats = [
-            "%Y-%m-%dT%H:%M:%S.%f%z",
-            "%Y-%m-%dT%H:%M:%S:%Z",
-            "%Y-%m-%dT%H:%M:%S.%f",
-            "%Y-%m-%dT%H:%M:%S",
-            "%Y-%m-%d %H:%M:%S.%f",
-            "%d/%m/%Y %H:%M",
+        data_filtered["Datetime"] = [
+            strptime_from_text(dt, "%d/%m/%Y %H:%M") for dt in data_filtered["ChunkEnd"]
         ]
+        return data_filtered.sort_values(by=["Datetime"]).reset_index(drop=True)
 
-    df = df.copy()
-    df[col_timestamp] = df[col_timestamp].apply(
-        lambda x: strptime_from_text(x, date_formats))
-    return df
-
-
-def required_columns(
-    df: DataFrame,
-    columns: list[str],
-) -> None:
-    """Validate that required columns exist in dataframe.
-
-    Parameters
-    ----------
-    df: DataFrame
-        Table to validate.
-    columns : list[str]
-        List of required column names.
-
-    Raises
-    ------
-    ValueError
-        If any required column is missing.
+    if ext == "txt":
+        pod_type = {Path(f).suffix.lower().strip(".p3") for f in data["File"]}
 
-    """
-    for col in columns:
-        if col not in df.columns:
-            msg = f"'{col}' not found in {df}"
+        if len(pod_type) != 1:
+            msg = f"Multiple POD types found in {folder}: {pod_type}"
             raise ValueError(msg)
 
+        pod_type = pod_type.pop()
 
-def create_mask(
-    df: DataFrame,
-    col_timestamp: str,
-    col_start: str,
-    col_end: str,
-) -> DataFrame:
-    """Filter rows to keep only those within deployment period.
-
-    Parameters
-    ----------
-    df : DataFrame
-        Table with timestamp and deployment period columns.
-    col_timestamp : str
-        Name of timestamp column.
-    col_start : str
-        Name of deployment start date column.
-    col_end : str
-        Name of deployment end date column.
-
-    Returns
-    -------
-    DataFrame
-        Filtered dataframe with rows in deployment periods.
-
-    """
-    mask = (
-        (df[col_timestamp] >= df[col_start])
-    & (df[col_timestamp] <= df[col_end])
-        & df[col_timestamp].notna()
-        & df[col_start].notna()
-        & df[col_end].notna()
-    )
-    return df[mask].copy()
-
-
-def meta_cut_aplose(
-    raw_data: DataFrame,
-    metadata: DataFrame,
-) -> DataFrame:
-    """Filter data to keep only rows within deployment periods.
-
-    Parameters
-    ----------
-    raw_data : DataFrame
-        Table containing deployment name and timestamps.
-    metadata : DataFrame
-        Metadata with deployment periods (start/end dates).
-
-    Returns
-    -------
-    DataFrame
-        Filtered data with only rows within deployment periods.
-
-    """
-    required_columns(
-        raw_data, ["deploy.name", "start_datetime"])
-    required_columns(
-        metadata, ["deploy.name", "deployment_date", "recovery_date"])
-
-    raw = parse_timestamps(raw_data, "start_datetime")
-    raw = raw.sort_values(["start_datetime"])
-
-    dfm = raw.merge(
-        metadata[["deploy.name", "deployment_date", "recovery_date"]],
-        on="deploy.name",
-        how="left",
-    )
-
-    out = create_mask(dfm, "start_datetime", "deployment_date", "recovery_date")
-
-    columns_to_drop = [
-        col for col in ["deployment_date", "recovery_date"] if col not in raw_data.
-        columns]
-    if columns_to_drop:
-        out = out.drop(columns=columns_to_drop)
-
-    return out.sort_values(["start_datetime"]).reset_index(drop=True)
-
-
-def add_utc(
-    df: DataFrame,
-    cols: list,
-    fr: str = "h",
-) -> DataFrame:
-    """Create a DataFrame with one line per hour between start and end dates.
-
-    Keep the number of detections per hour between these dates.
-
-    Parameters
-    ----------
-    df: DataFrame
-        Metadata dataframe with deployments information (previously exported as json).
-    cols:list
-        Timestamp column names.
-    fr:str
-        Frequency of the range of detections.
-
-    Returns
-    -------
-    DataFrame
-        A full period of time with positive and negative hours to detections.
-
-    """
-    for col in df[cols]:
-        df[col] = to_datetime(df[col], utc=True)
-        df[col] = df[col].dt.floor(fr)
-    return df
-
-
-def build_range(
-    df: DataFrame,
-    fr: str = "h",
-) -> DataFrame:
-    """Create a DataFrame with one line per hour between start and end dates.
+        data["Datetime"] = data.apply(
+            lambda row: get_feeding_buzz_datetime(row, pod=f"{pod_type}"),
+            axis=1,
+        )
+        return data.sort_values(by=["Datetime"]).reset_index(drop=True)
 
-    Keep the number of detections per hour between these dates.
+    msg = f"Could not load {ext} result folder"
+    raise ValueError(msg)
 
-    Parameters
-    ----------
-    df: DataFrame
-        Metadata dataframe with deployments information (previously exported as json)
-    fr:str
-        Frequency of the range of detections.
 
-    Returns
-    -------
-    DataFrame
-        A full period of time with positive and negative hours to detections.
+def get_feeding_buzz_datetime(row: Series, pod: str) -> Timestamp:
+    """Convert feeding buzz timestamp into a standard Timestamp.
 
+    The conversion method differs based on the POD type.
     """
-    add_utc(df, ["Deb", "Fin"], fr)
+    if pod not in {"c", "f"}:
+        msg = f"Invalid POD type: {pod}"
+        raise ValueError(msg)
 
-    all_ranges = []
-    for _, row in df.iterrows():
-        hours = date_range(row["Deb"], row["Fin"], freq=fr)
-        tmp = DataFrame(
-            {
-                "deploy.name": row["deploy.name"],
-                "start_datetime": hours,
-            },
+    if pod == "f":
+        return (
+            to_datetime("1900-01-01") +
+            to_timedelta(row["Minute"], unit="min") +
+            to_timedelta(row["microsec"] / 1e6, unit="sec") -
+            to_timedelta(2, unit="D")
         )
-        all_ranges.append(tmp)
 
-    return concat(all_ranges, ignore_index=True)
+    return strptime_from_text(
+        f"{row["Minute"]}:{int(str(row["microsec"])[0]):02d}.{int(str(row["microsec"])[1:])}",
+        "%-d/%-m/%Y %H:%M:%S.%f",
+    )
 
 
-def feeding_buzz(
+def process_feeding_buzz(
     df: DataFrame,
     species: str,
 ) -> DataFrame:
-    """Process a CPOD/FPOD feeding buzz detection file.
+    """Process a POD feeding buzz detection DataFrame.
 
-    Gives the feeding buzz duration, depending on the studied species.
+    Give the feeding buzz duration, depending on the studied species.
 
     Parameters
     ----------
@@ -397,322 +212,69 @@ def feeding_buzz(
     Returns
     -------
     DataFrame
-        Containing all ICIs for every positive minutes to clicks
+        Containing all ICIs for every positive minute to click
 
     """
-    df["microsec"] /= 1e6
-
-    try:
-        df["Minute"].astype(int)
-        df["datetime"] = (
-            to_datetime("1900-01-01")
-            + to_timedelta(df["Minute"], unit="min")
-            + to_timedelta(df["microsec"], unit="sec")
-            - to_timedelta(2, unit="D")
-        )
-        df["start_datetime"] = df["datetime"].dt.floor("min")
-    except (ValueError, TypeError):
-        df["datetime"] = (
-                to_datetime(df["Minute"], dayfirst=True)
-                + to_timedelta(df["microsec"], unit="sec")
-        )
-        df["start_datetime"] = to_datetime(df["Minute"], dayfirst=True)
-
-    df["ICI"] = df["datetime"].diff()
-
-    if species == "Dauphin":  # Herzing et al., 2014
-        df["Buzz"] = df["ICI"].between(Timedelta(0),
-                                       Timedelta(seconds=0.02)).astype(int)
-    elif species == "Marsouin":  # Nuuttila et al., 2013
-        df["Buzz"] = df["ICI"].between(Timedelta(0),
-                                       Timedelta(seconds=0.01)).astype(int)
-    elif species == "Commerson":  # Reyes Reyes et al., 2015
-        df["Buzz"] = df["ICI"].between(Timedelta(0),
-                                       Timedelta(seconds=0.005)).astype(int)
+    df["Datetime"] = df["Datetime"].dt.floor("min")
+    df["ICI"] = df["Datetime"].diff()
+
+    if species.lower() == "delphinid":  # Herzing et al., 2014
+        df["Buzz"] = df["ICI"].between(
+            Timedelta(0),
+            Timedelta(seconds=0.02),
+        ).astype(int)
+    elif species.lower() == "porpoise":  # Nuuttila et al., 2013
+        df["Buzz"] = df["ICI"].between(
+            Timedelta(0),
+            Timedelta(seconds=0.01),
+        ).astype(int)
+    elif species.lower() == "commerson":  # Reyes Reyes et al., 2015
+        df["Buzz"] = df["ICI"].between(
+            Timedelta(0),
+            Timedelta(seconds=0.005),
+        ).astype(int)
     else:
         msg = "This species is not supported"
         raise ValueError(msg)
 
-    f = df.groupby(["start_datetime"])["Buzz"].sum().reset_index()
+    df_buzz = df.groupby(["Datetime"])["Buzz"].sum().reset_index()
+    df_buzz["Foraging"] = to_numeric(
+        df_buzz["Buzz"] != 0, downcast="integer"
+    ).astype(int)
 
-    f["Foraging"] = to_numeric(f["Buzz"] != 0, downcast="integer").astype(int)
+    return df_buzz
 
-    return f
 
+def process_timelost(df: DataFrame, threshold: int = 0) -> DataFrame:
+    """Process TimeLost DataFrame.
 
-def assign_daytime(
-    df: DataFrame,
-) -> DataFrame:
-    """Assign datetime categories to temporal events.
-
-    Categorize daytime of the detection (among 4 categories).
+    Returns relevant columns and reshape into hourly data.
 
     Parameters
     ----------
     df: DataFrame
-        Contains positive hours to detections.
-
-    Returns
-    -------
-    DataFrame
-        The same dataframe with the column daytime.
-
-    """
-    df["start_datetime"] = to_datetime(df["start_datetime"], utc=True)
-    start = df["start_datetime"].min()
-    stop = df["start_datetime"].max()
-    lat, lon = get_coordinates()
-    sunrise, sunset = get_sun_times(start, stop, lat, lon)
-
-    sun_times = DataFrame(
-        {"date": date_range(start, stop, freq="D"),
-        "sunrise": [Timedelta(h, "hours") for h in sunrise],
-        "sunset": [Timedelta(h, "hours") for h in sunset],
-        })
-
-    sun_times["sunrise"] = sun_times["date"].dt.floor("D") + sun_times["sunrise"]
-    sun_times["sunset"] = sun_times["date"].dt.floor("D") + sun_times["sunset"]
-
-    for i, row in df.iterrows():
-        dpm_i = row["start_datetime"]
-        if notna(dpm_i):  # Check if time is not NaN
-            jour_i = sun_times[
-                (sun_times["sunrise"].dt.year == dpm_i.year)
-                & (sun_times["sunrise"].dt.month == dpm_i.month)
-                & (sun_times["sunrise"].dt.day == dpm_i.day)
-                ]
-            if not jour_i.empty:  # Ensure there's a matching row
-                jour_i = jour_i.iloc[0]  # Extract first match
-                if (dpm_i <= jour_i["sunrise"]) | (dpm_i > jour_i["sunset"]):
-                    df.loc[i, "REGIME"] = 1
-                else:
-                    df.loc[i, "REGIME"] = 2
-
-    return df
-
-
-def is_dpm_col(
-    df: DataFrame,
-) -> DataFrame:
-    """Ensure DPM column exists with default value of 1.
-
-    Parameters
-    ----------
-    df: DataFrame
-        Input dataframe.
-
-    Returns
-    -------
-    DataFrame
-        Copy of df with DPM column.
-
-    """
-    df = df.copy()
-    if "DPM" not in df.columns:
-        df["DPM"] = 1
-    return df
-
-
-def resample_dpm(
-    df: DataFrame,
-    frq: str,
-    cols: dict[str, str],
-    group_by: list[str] | None = None,
-) -> DataFrame:
-    """Resample DPM data to specified time frequency.
-
-    Aggregates Detection Positive Minutes (DPM) by time period,
-    optionally preserving grouping columns like deployment name.
-
-    Parameters
-    ----------
-    df: DataFrame
-        CPOD result DataFrame with DPM data.
-    frq: str
-        Pandas frequency string: "D" (day), "h" (hour), "10min", etc.
-    cols: dict[str, str]
-        Dictionary of column names and to process them.
-    group_by: list[str], optional
-        Columns to group by (e.g., ["deploy.name", "start_datetime"]).
-        If None, groups only by start_datetime.
-
-    Returns
-    -------
-    DataFrame
-        Resampled DataFrame with aggregated DPM values.
-
-    Examples
-    --------
-    >>> # Daily aggregation per deployment
-    >>> resample_dpm(df, "D", {"Foraging":"sum"}, group_by=["deploy.name"])
-
-    >>> # Hourly aggregation with site info preserved
-    >>> resample_dpm(df, "h", cols={"DPM":"sum","deploy.name":"first"})
-
-    """
-    df = is_dpm_col(df)
-    df = add_utc(df, ["start_datetime"], frq)
-
-    # Determine grouping columns
-    if group_by is None:
-        group_by = ["start_datetime"]
-
-    return df.groupby(group_by).agg(cols).reset_index()
-
-
-def deploy_period(
-    df: DataFrame,
-    col_timestamp: str = "start_datetime",
-    col_deployment: str = "deploy.name",
-) -> DataFrame:
-    """Extract start and end timestamps for each deployment.
-
-    Parameters
-    ----------
-    df: DataFrame
-        Input dataframe with parsed timestamps.
-    col_timestamp: str, default="start_datetime"
-        Name of the timestamp column.
-    col_deployment: str, default="deploy.name"
-        Name of the deployment identifier column.
-
-    Returns
-    -------
-    DataFrame
-        Table with columns: [col_deployment, 'Deb', 'Fin'].
-
-    """
-    return (
-        df.groupby([col_deployment])
-        .agg(Deb=(col_timestamp, "first"), Fin=(col_timestamp, "last"))
-        .reset_index()
-    )
-
-
-def first_last(
-    df: DataFrame,
-    col_timestamp: str = "start_datetime",
-    col_deployment: str = "deploy.name",
-    date_formats: list[str] | None = None,
-) -> DataFrame:
-    """Isolate beginning and end of every deployment.
-
-    Parameters
-    ----------
-    df: DataFrame
-        CPOD result DataFrame.
-    col_timestamp: str, default="start_datetime"
-        Name of the timestamps' column.
-    col_deployment: str, default="deploy.name"
-        Name of the deployment identifier column.
-    date_formats: list[str], optional
-        List of date formats to try for parsing.
-
-    Returns
-    -------
-    DataFrame
-        Table with deployment periods (Deb, Fin).
-
-    """
-    df_parsed = parse_timestamps(df, col_timestamp, date_formats)
-    return deploy_period(df_parsed, col_timestamp, col_deployment)
-
-
-def actual_data(
-    df: DataFrame,
-    meta: DataFrame,
-) -> DataFrame:
-    """Create a table with beginning and end of every deployment using metadata.
-
-    Parameters
-    ----------
-    df: DataFrame
-        Contains beginning and end for every deployment.
-    meta: DataFrame
-        Contains metadata for every deployment.
-
-    Returns
-    -------
-    DataFrame
-        Table with corrected deployment periods (Deb, Fin).
-
-    """
-    required_columns(
-        df, ["deploy.name", "ChunkEnd"])
-    required_columns(
-        meta, ["deploy.name", "deployment_date", "recovery_date"])
-
-    beg_end = first_last(df, "ChunkEnd")
-
-    beg_end = add_utc(beg_end, ["Deb", "Fin"])
-
-    final = beg_end.merge(meta[["deployment_date", "recovery_date", "deploy.name"]],
-                          on="deploy.name", how="left")
-    final.loc[final["Deb"] < final["deployment_date"], "Deb"] = final["deployment_date"]
-    final.loc[final["Fin"] > final["recovery_date"], "Fin"] = final["recovery_date"]
-    final.loc[final["Deb"] > final["Fin"], ["Deb", "Fin"]] = None
-    final = final.sort_values(by=["Deb"])
-    return final.drop(["deployment_date", "recovery_date"], axis=1)
-
-
-def process_tl(tl_files: Path) -> DataFrame:
-    """Process Environmental data extracted from cpod.exe to get a usable dataframe.
-
-    Parameters
-    ----------
-    tl_files: Path
         All your Environmental data files.
+    threshold: float
+        TimeLost threshold.
 
     Returns
     -------
     %TimeLost DataFrame.
 
     """
-    df = csv_folder(tl_files)
-    df = df.dropna()
-    df = parse_timestamps(df, "ChunkEnd")
-    df = add_utc(df, ["ChunkEnd"], "h")
-    df["start_datetime"] = df["ChunkEnd"]
-
-    return df.sort_values(["start_datetime"])
-
-
-def filter_tl(df: DataFrame, tl: int) -> DataFrame:
-    """Remove lines with a %TimeLost superior to the chosen threshold.
-
-    Parameters
-    ----------
-    df: DataFrame
-        Table of data and associated TimeLost.
-    tl: int
-        TimeLost filter threshold.
-
-    Returns
-    -------
-    Filtered DataFrame with few %TimeLost.
-
-    """
-    df["%TimeLost"] = (df["%TimeLost"].fillna(tl)).astype(int)
-
-    return df[df["%TimeLost"] < tl]
-
-
-def preserved_data(filtered_df: DataFrame, whole_df: DataFrame) -> float:
-    """Calculate the percentage of preserved data.
-
-    Parameters
-    ----------
-    filtered_df: DataFrame
-        Result of filter_tl.
-    whole_df: DataFrame
-        Table before filtering.
-
-    Returns
-    -------
-    Percentage of preserved data.
+    if threshold not in range(0, 100):
+        msg = "Threshold must be an integer between 0 and 100."
+        raise ValueError(msg)
 
-    """
-    return (len(filtered_df) / len(whole_df)) * 100
+    df["Datetime"] = df["Datetime"].dt.floor("h")
+    cols_to_drop = [
+        col for col in df.columns if col not in {
+            "File", "Datetime", "Temp", "Angle", "%TimeLost", "Deploy"
+        }
+    ]
+    return df[df["%TimeLost"] >= threshold].drop(
+        columns=cols_to_drop
+    ).sort_values(["Datetime"]).reset_index(drop=True)
 
 
 def create_matrix(
@@ -747,27 +309,6 @@ def create_matrix(
     return matrix
 
 
-def extract_site(
-    df: DataFrame,
-) -> DataFrame:
-    """Create new columns: site.name and campaign.name, in order to match the metadata.
-
-    Parameters
-    ----------
-    df: DataFrame
-        All values concatenated
-
-    Returns
-    -------
-    DataFrame
-        The same dataframe with two additional columns.
-
-    """
-    required_columns(df, ["deploy.name"])
-    df[["site.name", "campaign.name"]] = df["deploy.name"].str.split("_", expand=True)
-    return df
-
-
 def percent_calc(
     data: DataFrame,
     time_unit: str | None = None,
diff --git a/src/post_processing/utils/glider_utils.py b/src/post_processing/utils/glider_utils.py
index 626371c..aaa0224 100644
--- a/src/post_processing/utils/glider_utils.py
+++ b/src/post_processing/utils/glider_utils.py
@@ -175,7 +175,7 @@ def load_glider_nav(directory: Path) -> DataFrame:
         msg = f"Directory '{directory}' does not exist."
         raise FileNotFoundError(msg)
 
-    file = [f for f in directory.glob("*.gz") if "gli" in f.name]
+    file = [f for f in directory.rglob("*.gz") if "gli" in f.name]
 
     if not len(file) > 0:
         msg = f"Directory '{directory}' does not contain '.gz' files."
diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 8d12fa3..79c7673 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -577,7 +577,7 @@ def timeline(
 
     ax.grid(color="k", linestyle="-", linewidth=0.2)
     ax.set_yticks(np.arange(0, len(labels), 1))
-    ax.set_yticklabels(labels[::-1])
+    ax.set_yticklabels(labels)
     ax.set_xlabel("Date")
     ax.set_xlim(
         df["start_datetime"].min().floor("1d"),
diff --git a/tests/test_fpod_utils.py b/tests/test_fpod_utils.py
index a0ab178..d1c4ece 100644
--- a/tests/test_fpod_utils.py
+++ b/tests/test_fpod_utils.py
@@ -4,7 +4,7 @@
 from pandas import DataFrame
 
 from post_processing.utils.fpod_utils import (
-    csv_folder,
+    load_pod_folder,
     pod2aplose,
 )
 
@@ -142,7 +142,7 @@ def test_csv_folder_single_file(tmp_path) -> None:
     csv_file = tmp_path / "data.csv"
     csv_file.write_text("col1;col2\nval1;val2\nval3;val4", encoding="latin-1")
 
-    result = csv_folder(tmp_path)
+    result = load_pod_folder(tmp_path)
 
     assert isinstance(result, DataFrame)
     assert len(result) == 2
diff --git a/user_case/example_FPOD-CPOD_raw.ipynb b/user_case/example_FPOD-CPOD_raw.ipynb
index 60e7f67..8412c3b 100644
--- a/user_case/example_FPOD-CPOD_raw.ipynb
+++ b/user_case/example_FPOD-CPOD_raw.ipynb
@@ -15,7 +15,7 @@
     "\n",
     "import pytz\n",
     "\n",
-    "from post_processing.utils.fpod_utils import csv_folder, pod2aplose, actual_data, meta_cut_aplose, feeding_buzz, txt_folder, add_utc\n",
+    "from post_processing.utils.fpod_utils import load_pod_folder, pod2aplose, actual_data, filter_by_metadatax, process_feeding_buzz, load_pod_txt, add_utc\n",
     "from post_processing.utils.core_utils import json2df\n",
     "\n",
     "import logging\n",
@@ -54,7 +54,7 @@
    "cell_type": "code",
    "source": [
     "pod_files = Path(r\"U:\\CA4\") #Path to your data folder. 🐬\n",
-    "path = csv_folder(pod_files) #Process all your POD.csv files.\n",
+    "path = load_pod_folder(pod_files) #Process all your POD.csv files.\n",
     "\n",
     "fb_files = Path(r\"U:\\fb_CA4\")  #Path to your click details folder. 🐬\n",
     "json = Path(r\"C:\\Users\\fouinel\\Downloads\\deployment_calais.json\") #Path to your metadata file. 🐬\n",
@@ -180,7 +180,7 @@
     }
    },
    "cell_type": "code",
-   "source": "cleared = meta_cut_aplose(df_aplose, metadatax)",
+   "source": "cleared = filter_by_metadatax(df_aplose, metadatax)",
    "id": "895bd5a116918285",
    "outputs": [],
    "execution_count": 5
@@ -211,8 +211,8 @@
    "metadata": {},
    "cell_type": "code",
    "source": [
-    "fb_all = txt_folder(fb_files) #Read all your FB.txt files.\n",
-    "fb_all = feeding_buzz(fb_all, \"Marsouin\") #Categorize the minutes (positive or not to FB detection). 🐬\n",
+    "fb_all = load_pod_txt(fb_files) #Read all your FB.txt files.\n",
+    "fb_all = process_feeding_buzz(fb_all, \"Marsouin\") #Categorize the minutes (positive or not to FB detection). 🐬\n",
     "add_utc(fb_all, [\"start_datetime\"], \"min\")"
    ],
    "id": "64f824e02131d90a",
diff --git a/user_case/resource/CPOD-FPOD_yaml.yml b/user_case/resource/CPOD-FPOD_yaml.yml
index 3e259cc..ded85b0 100644
--- a/user_case/resource/CPOD-FPOD_yaml.yml
+++ b/user_case/resource/CPOD-FPOD_yaml.yml
@@ -6,7 +6,7 @@
 #  - Several csv files can be loaded at once, to perform this copy the template and paste it at the end of the present file.
 
 #########################################
-'U:\APLOSE_A1112.csv':
+'C:\Users\dupontma2\Downloads\data_aplose.csv':
   # timebin: detection time resolution in seconds
   'timebin_new': null
   # begin datetime: '2000-01-01T00:00:00+0000'
@@ -16,7 +16,7 @@
   # annotator: ['annotator1', 'annotator2']
   'annotator': null
   # annotation: ['annotation1', 'annotation2']
-  'annotation': "Commerson"
+  'annotation': "Marsouin"
   # box: boolean value, whether to keep strong annotations
   'box': False
   # timestamp file
@@ -29,3 +29,5 @@
   'f_max': null
   # score
   'score': null
+  # filename format
+  'filename_format': '%Y-%m-%dT%H:%M:%S.%f%z'
\ No newline at end of file
diff --git a/user_case/user_case_CALAIS.ipynb b/user_case/user_case_CALAIS.ipynb
index a5e49a3..f0f109a 100644
--- a/user_case/user_case_CALAIS.ipynb
+++ b/user_case/user_case_CALAIS.ipynb
@@ -6,8 +6,8 @@
    "metadata": {
     "collapsed": true,
     "ExecuteTime": {
-     "end_time": "2025-10-23T15:42:49.393544Z",
-     "start_time": "2025-10-23T15:42:47.415071Z"
+     "end_time": "2026-01-21T13:29:51.583226Z",
+     "start_time": "2026-01-21T13:29:43.385674Z"
     }
    },
    "source": [
@@ -21,8 +21,8 @@
     ")\n",
     "\n",
     "from post_processing.dataclass.data_aplose import DataAplose\n",
-    "from post_processing.utils.fpod_utils import csv_folder, pod2aplose, actual_data, meta_cut_aplose, resample_dpm, txt_folder, build_range, extract_site, percent_calc, site_percent, year_percent, ym_percent, month_percent, hour_percent, \\\n",
-    "    feeding_buzz\n",
+    "from post_processing.utils.fpod_utils import load_pod_folder, pod2aplose, actual_data, filter_by_metadatax, resample_dpm, load_pod_txt, build_range, extract_site, percent_calc, site_percent, year_percent, ym_percent, month_percent, hour_percent, \\\n",
+    "    process_feeding_buzz\n",
     "from post_processing.utils.core_utils import json2df,get_season"
    ],
    "outputs": [],
@@ -53,7 +53,7 @@
    "cell_type": "code",
    "source": [
     "pod_files = Path(r\"U:\\Walde\")\n",
-    "path = csv_folder(pod_files)\n",
+    "path = load_pod_folder(pod_files)\n",
     "print(path.head())\n",
     "\n",
     "df_0 = path.dropna()\n",
@@ -253,7 +253,7 @@
    "source": [
     "\n",
     "\n",
-    "cleared = meta_cut_aplose(df_aplose, metadatax) #Remove lines captures outside the instrument submersion."
+    "cleared = filter_by_metadatax(df_aplose, metadatax) #Remove lines captures outside the instrument submersion."
    ],
    "id": "ed6a06c522aea169",
    "outputs": [],
@@ -347,8 +347,8 @@
    "cell_type": "code",
    "source": [
     "fb_files = Path(r\"U:\\fb_fpod_cetiroise_c\") #Path to your click details folder.\n",
-    "fb_all = txt_folder(fb_files)\n",
-    "fb_all = feeding_buzz(fb_all, \"Marsouin\")\n",
+    "fb_all = load_pod_txt(fb_files)\n",
+    "fb_all = process_feeding_buzz(fb_all, \"Marsouin\")\n",
     "\n",
     "fb_all[\"start_datetime\"] = fb_all[\"start_datetime\"].dt.floor(frq)\n",
     "fb = fb_all.groupby(\"start_datetime\")[\"Foraging\"].sum().reset_index()\n",
@@ -363,6 +363,7 @@
    "cell_type": "code",
    "source": [
     "d_tot = resamp.merge(fb, on=\"start_datetime\", how=\"left\")\n",
+    "\n",
     "#This function aims to reindent 0 between the positive detections. It will be useful to produce first visualization graphs and use this dataset in R.\n",
     "d_hour = build_range(d_beg_end, frq)"
    ],

From f0adf5bb77d7d4154741bca36b0f1b89f199ba92 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Tue, 27 Jan 2026 17:16:31 +0100
Subject: [PATCH 32/33] refacto

---
 src/post_processing/utils/filtering_utils.py  |  22 +-
 src/post_processing/utils/fpod_utils.py       | 124 ++--
 src/post_processing/utils/plot_utils.py       |   2 +
 user_case/example_FPOD-CPOD_aplose.ipynb      | 287 ---------
 .../example_FPOD-CPOD_firstresults.ipynb      | 251 --------
 user_case/example_FPOD-CPOD_raw.ipynb         | 420 -------------
 user_case/resource/CPOD-FPOD_yaml.yml         |  33 -
 user_case/user_case_CALAIS.ipynb              | 581 ------------------
 8 files changed, 82 insertions(+), 1638 deletions(-)
 delete mode 100644 user_case/example_FPOD-CPOD_aplose.ipynb
 delete mode 100644 user_case/example_FPOD-CPOD_firstresults.ipynb
 delete mode 100644 user_case/example_FPOD-CPOD_raw.ipynb
 delete mode 100644 user_case/resource/CPOD-FPOD_yaml.yml
 delete mode 100644 user_case/user_case_CALAIS.ipynb

diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py
index 661324b..a9e0619 100644
--- a/src/post_processing/utils/filtering_utils.py
+++ b/src/post_processing/utils/filtering_utils.py
@@ -485,7 +485,8 @@ def _process_annotator_label_pair(
         time_vector[i] for i, detected in enumerate(detect_vec) if detected
     ]
     file_vector = [
-        filename_vector[i] for i, detected in enumerate(detect_vec) if detected
+        # filename_vector[i] for i, detected in enumerate(detect_vec) if detected
+        filename_vector[i + 1] for i, detected in enumerate(detect_vec) if detected
     ]
 
     if not start_datetime:
@@ -571,16 +572,17 @@ def get_filename_timestamps(df: DataFrame, date_parser: str) -> list[Timestamp]:
 
     """
     tz = get_timezone(df)
-    try:
-        return [
-        to_datetime(
+    timestamps = [
+        strptime_from_text(
             ts,
-            format=date_parser,
-        ).tz_localize(tz) for ts in df["filename"]
-        ]
-    except ValueError:
-        msg = """Could not parse timestamps from `df["filename"]`."""
-        raise ValueError(msg) from None
+            datetime_template=date_parser,
+        ) for ts in df["filename"]
+    ]
+
+    if all(t.tz is None for t in timestamps):
+        timestamps = [t.tz_localize(tz) for t in timestamps]
+
+    return timestamps
 
 
 def ensure_in_list(value: str, candidates: list[str], label: str) -> None:
diff --git a/src/post_processing/utils/fpod_utils.py b/src/post_processing/utils/fpod_utils.py
index 12cee28..cd13e58 100644
--- a/src/post_processing/utils/fpod_utils.py
+++ b/src/post_processing/utils/fpod_utils.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import logging
-from pathlib import Path
 from typing import TYPE_CHECKING
 
 import matplotlib.dates as mdates
@@ -30,6 +29,8 @@
 from user_case.config import season_color, site_colors
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     import pytz
 
 logger = logging.getLogger(__name__)
@@ -66,26 +67,27 @@ def pod2aplose(
         An APLOSE formatted DataFrame.
 
     """
-    df["Datetime"] = [
+    fpod_start_dt = [
         tz.localize(entry)
         for entry in df["Datetime"]
     ]
 
-    fpod_start_dt = df["Datetime"]
-    fpod_end_dt = [entry + bin_size for entry in df["Datetime"]]
-
     data = {
         "dataset": [dataset_name] * len(df),
-        "filename": [strftime_osmose_format(entry) for entry in fpod_start_dt],
+        "filename": list(fpod_start_dt),
         "start_time": [0] * len(df),
         "end_time": [bin_size.total_seconds()] * len(df),
         "start_frequency": [0] * len(df),
         "end_frequency": [0] * len(df),
         "annotation": [annotation] * len(df),
         "annotator": [annotator] * len(df),
-        "start_datetime": [strftime_osmose_format(entry) for entry in fpod_start_dt],
-        "end_datetime": [strftime_osmose_format(entry) for entry in fpod_end_dt],
-        "is_box": [0] * len(df),
+        "start_datetime": [
+            strftime_osmose_format(entry.floor(bin_size)) for entry in fpod_start_dt
+        ],
+        "end_datetime": [
+            strftime_osmose_format(entry.ceil(bin_size)) for entry in fpod_start_dt
+        ],
+        "type": ["WEAK"] * len(df),
         "deploy": df["Deploy"].tolist(),
     }
 
@@ -140,58 +142,67 @@ def load_pod_folder(
     data = concat(all_data, ignore_index=True)
 
     if ext == "csv":
-        if "%TimeLost" in data.columns:
-            data_filtered = data[data["File"].notna()].copy()
-            data_filtered = data_filtered[data_filtered["Nall/m"].notna()]
-        else:
-            data_filtered = data[data["DPM"] > 0].copy()
-            data_filtered = data_filtered[data_filtered["Nall"].notna()]
+        return _process_csv_data(data)
+    if ext == "txt":
+        return _process_txt_data(data)
 
-        data_filtered["Datetime"] = [
-            strptime_from_text(dt, "%d/%m/%Y %H:%M") for dt in data_filtered["ChunkEnd"]
-        ]
-        return data_filtered.sort_values(by=["Datetime"]).reset_index(drop=True)
+    msg = f"Could not load {ext} result folder"
+    raise ValueError(msg)
 
-    if ext == "txt":
-        pod_type = {Path(f).suffix.lower().strip(".p3") for f in data["File"]}
 
-        if len(pod_type) != 1:
-            msg = f"Multiple POD types found in {folder}: {pod_type}"
-            raise ValueError(msg)
+def _process_csv_data(data: DataFrame) -> DataFrame:
+    """Process CSV data with filtering and datetime conversion."""
+    data_filtered = _filter_csv_data(data)
+    data_filtered["Datetime"] = [
+        strptime_from_text(dt, "%d/%m/%Y %H:%M")
+        for dt in data_filtered["ChunkEnd"]
+    ]
+    return data_filtered.sort_values(by=["Datetime"]).reset_index(drop=True)
 
-        pod_type = pod_type.pop()
 
-        data["Datetime"] = data.apply(
-            lambda row: get_feeding_buzz_datetime(row, pod=f"{pod_type}"),
-            axis=1,
-        )
-        return data.sort_values(by=["Datetime"]).reset_index(drop=True)
+def _filter_csv_data(data: DataFrame) -> DataFrame:
+    """Filter CSV data based on available columns."""
+    if "%TimeLost" in data.columns:
+        data_filtered = data[data["File"].notna()].copy()
+        data_filtered = data_filtered[data_filtered["Nall/m"].notna()]
+    else:
+        data_filtered = data[data["DPM"] > 0].copy()
+        data_filtered = data_filtered[data_filtered["Nall"].notna()]
 
-    msg = f"Could not load {ext} result folder"
-    raise ValueError(msg)
+    return data_filtered
+
+
+def _process_txt_data(data: DataFrame) -> DataFrame:
+    """Process TXT data with datetime conversion."""
+    data["Datetime"] = data.apply(get_feeding_buzz_datetime, axis=1)
+    return data.drop_duplicates().sort_values(by=["Datetime"]).reset_index(drop=True)
 
 
-def get_feeding_buzz_datetime(row: Series, pod: str) -> Timestamp:
+def get_feeding_buzz_datetime(row: Series) -> Timestamp:
     """Convert feeding buzz timestamp into a standard Timestamp.
 
     The conversion method differs based on the POD type.
     """
-    if pod not in {"c", "f"}:
-        msg = f"Invalid POD type: {pod}"
-        raise ValueError(msg)
-
-    if pod == "f":
+    try:
         return (
-            to_datetime("1900-01-01") +
-            to_timedelta(row["Minute"], unit="min") +
-            to_timedelta(row["microsec"] / 1e6, unit="sec") -
-            to_timedelta(2, unit="D")
+            to_datetime("1900-01-01")
+            + to_timedelta(row["Minute"], unit="min")
+            + to_timedelta(row["microsec"] / 1e6, unit="sec")
+            - to_timedelta(2, unit="D")
         )
+    except (KeyError, TypeError, ValueError):
+        pass
 
-    return strptime_from_text(
-        f"{row["Minute"]}:{int(str(row["microsec"])[0]):02d}.{int(str(row["microsec"])[1:])}",
-        "%-d/%-m/%Y %H:%M:%S.%f",
-    )
+    try:
+        return strptime_from_text(
+            f"{row['Minute']}:{int(str(row['microsec'])[0]):02d}.{int(str(row['microsec'])[1:])}",
+            "%-d/%-m/%Y %H:%M:%S.%f",
+        )
+    except (KeyError, TypeError, ValueError):
+        pass
+
+    msg = "Could not convert feeding buzz timestamp."
+    raise ValueError(msg)
 
 
 def process_feeding_buzz(
@@ -200,7 +211,8 @@ def process_feeding_buzz(
 ) -> DataFrame:
     """Process a POD feeding buzz detection DataFrame.
 
-    Give the feeding buzz duration, depending on the studied species.
+    Give the feeding buzz duration, depending on the studied species
+    (`delphinid`, `porpoise` or `commerson`).
 
     Parameters
     ----------
@@ -215,8 +227,8 @@ def process_feeding_buzz(
         Containing all ICIs for every positive minute to click
 
     """
-    df["Datetime"] = df["Datetime"].dt.floor("min")
     df["ICI"] = df["Datetime"].diff()
+    df["Datetime"] = df["Datetime"].dt.floor("min")
 
     if species.lower() == "delphinid":  # Herzing et al., 2014
         df["Buzz"] = df["ICI"].between(
@@ -239,7 +251,7 @@ def process_feeding_buzz(
 
     df_buzz = df.groupby(["Datetime"])["Buzz"].sum().reset_index()
     df_buzz["Foraging"] = to_numeric(
-        df_buzz["Buzz"] != 0, downcast="integer"
+        df_buzz["Buzz"] != 0, downcast="integer",
     ).astype(int)
 
     return df_buzz
@@ -262,18 +274,18 @@ def process_timelost(df: DataFrame, threshold: int = 0) -> DataFrame:
     %TimeLost DataFrame.
 
     """
-    if threshold not in range(0, 100):
-        msg = "Threshold must be an integer between 0 and 100."
+    if threshold not in range(101):
+        msg = "Threshold must integer between 0 and 100."
         raise ValueError(msg)
 
     df["Datetime"] = df["Datetime"].dt.floor("h")
     cols_to_drop = [
         col for col in df.columns if col not in {
-            "File", "Datetime", "Temp", "Angle", "%TimeLost", "Deploy"
+            "File", "Datetime", "Temp", "Angle", "%TimeLost", "Deploy",
         }
     ]
-    return df[df["%TimeLost"] >= threshold].drop(
-        columns=cols_to_drop
+    return df[df["%TimeLost"] <= threshold].drop(
+        columns=cols_to_drop,
     ).sort_values(["Datetime"]).reset_index(drop=True)
 
 
@@ -313,7 +325,7 @@ def percent_calc(
     data: DataFrame,
     time_unit: str | None = None,
 ) -> DataFrame:
-    """Calculate percentage of clicks, feeding buzzes and positive hours to detection.
+    """Calculate the percentage of clicks, feeding buzzes and positive hours to detection.
 
     Computed on the entire effort and for every site.
 
@@ -358,7 +370,7 @@ def percent_calc(
 
 
 def site_percent(df: DataFrame, metric: str) -> None:
-    """Plot a graph with percentage of minutes positive to detection for every site.
+    """Plot a graph with the percentage of minutes positive to detection for every site.
 
     Parameters
     ----------
diff --git a/src/post_processing/utils/plot_utils.py b/src/post_processing/utils/plot_utils.py
index 79c7673..4cffcd6 100644
--- a/src/post_processing/utils/plot_utils.py
+++ b/src/post_processing/utils/plot_utils.py
@@ -239,6 +239,7 @@ def scatter(
     season = kwargs.get("season", False)
     coordinates = kwargs.get("coordinates", False)
     effort = kwargs.get("effort", False)
+    legend = kwargs.get("legend", False)
 
     _prepare_timeline_plot(
         df=df,
@@ -282,6 +283,7 @@ def scatter(
         shade_no_effort(
             ax=ax,
             observed=effort,
+            legend=legend,
         )
 
 
diff --git a/user_case/example_FPOD-CPOD_aplose.ipynb b/user_case/example_FPOD-CPOD_aplose.ipynb
deleted file mode 100644
index b0da1c8..0000000
--- a/user_case/example_FPOD-CPOD_aplose.ipynb
+++ /dev/null
@@ -1,287 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "id": "initial_id",
-   "metadata": {
-    "collapsed": true
-   },
-   "source": [
-    "from pathlib import Path\n",
-    "\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "from pandas import (\n",
-    "    read_csv,\n",
-    ")\n",
-    "\n",
-    "from post_processing.dataclass.data_aplose import DataAplose\n",
-    "from post_processing.utils.fpod_utils import resample_dpm, build_range, process_tl, filter_tl, preserved_data"
-   ],
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Load data\n",
-    "DPM = Detection Positive Minutes \\\n",
-    "FB = Feeding buzzes \\\n",
-    "🐬 = input to modify\n"
-   ],
-   "id": "a97e19830123b732"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "yaml_file = Path(r\"resource\\CPOD-FPOD_yaml.yml\") #Change the file path in the yaml sheet.🐬\n",
-    "data_list = DataAplose.from_yaml(file=yaml_file)\n",
-    "\n",
-    "d_beg_end = read_csv(r\"U:\\Deb_Fin_CA4.csv\") #Beginning and end of recording for every phase. 🐬\n",
-    "\n",
-    "tl_path = Path(r\"U:\\TimeLost\\tl_ca4\\phases\")\n",
-    "tl_df = process_tl(tl_path)\n",
-    "tl_df = tl_df.drop_duplicates(subset=['ChunkEnd'], keep=\"first\")\n",
-    "tl_df[\"Angle\"] = (tl_df[\"Angle\"].replace(',', '.', regex=True)).astype(float)\n",
-    "tl_df[\"Temp\"] = (tl_df[\"Temp\"].replace(',', '.', regex=True)).astype(float)"
-   ],
-   "id": "7da2feb5958db1a9",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Resample your data\n",
-    "Here you need to choose the format in which you want to visualise the positive detections. This aims to replace the functionalities available in Chelonia's softwares."
-   ],
-   "id": "3bc57f4f638ad6dc"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "frq = \"h\" #Determine the format in which you want to visualise your data. Use \"D\", \"h\" or \"10min\". 🐬\n",
-    "tl = 100 #%TimeLost threshold. If you do not want to set a filter, set tl to 100."
-   ],
-   "id": "9b0a078a262ac7f2",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "resamp = resample_dpm(data_list.df, frq=frq, cols={\"DPM\":\"sum\",\"Foraging\":\"sum\",\"deploy.name\":\"first\"}) #Resample your DPMs according to the chosen frq.",
-   "id": "fa3847d80ccf49c3",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Add the effort\n",
-    "To analyze the data, add zeros to view it based on effort."
-   ],
-   "id": "b92537991aa4ac4b"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "d_0 = build_range(d_beg_end, frq) #Create a dataframe from beginning to end of every phase filled with 0s.\n",
-    "d_tot = d_0.merge(resamp, on=[\"start_datetime\", \"deploy.name\"], how=\"left\")"
-   ],
-   "id": "4d76089ef06c6fdb",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "### Add additional metrics columns",
-   "id": "e6a4623e4baf25b5"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "d_tot[[\"DPM\",\"Foraging\"]] = d_tot[[\"DPM\",\"Foraging\"]].fillna(0)\n",
-    "d_tot[\"FBR\"] = d_tot[\"Foraging\"] / d_tot[\"DPM\"] #The Feeding Buzz Ratio corresponds to the proportion of FB among the recorded clicks.\n",
-    "d_tot[\"FBR\"] = d_tot[\"FBR\"].fillna(0)\n",
-    "d_tot[f\"DP{frq}\"] = (d_tot[\"DPM\"] >0).astype(int) #1 if the hour counts at least 1 DPM, else 0."
-   ],
-   "id": "912268e5e997dbc6",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "d_tot[\"DPH_fb\"] = (d_tot[\"Foraging\"] >0).astype(int)\n",
-    "d_tot[\"FBR_h\"] = d_tot[\"DPH_fb\"] / d_tot[\"DPH\"]\n",
-    "d_tot[\"FBR_h\"] = d_tot[\"FBR_h\"].fillna(0)"
-   ],
-   "id": "23e3e4137d9e2a84",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "### Add time columns",
-   "id": "a775158ba810957a"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "d_tot = d_tot.assign(**{attr: getattr(d_tot[\"start_datetime\"].dt, attr.lower())\n",
-    "                         for attr in ['Year', 'Month', 'Day', 'Hour']})\n",
-    "d_tot['YM'] = d_tot[\"start_datetime\"].dt.to_period('M').dt.to_timestamp()\n",
-    "d_tot['Date'] = d_tot[\"start_datetime\"].dt.to_period('D').dt.to_timestamp()"
-   ],
-   "id": "62ce5a31ed0db25a",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Filter your data\n",
-    "Chose a threshold of %TimeLost to remove all data exceeding it."
-   ],
-   "id": "98d31fb21ffb9165"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "full_df = d_tot.merge(\n",
-    "    tl_df[[\"start_datetime\", \"%TimeLost\", \"Angle\", \"Temp\"]],\n",
-    "    on=\"start_datetime\", how=\"left\")"
-   ],
-   "id": "66bf795805047a3d",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "filtered_df = filter_tl(full_df, tl)\n",
-    "preserved_data(filtered_df, full_df)"
-   ],
-   "id": "c61f1c95fc05749e",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "### Visualize environmental data",
-   "id": "e0ea7247e27a37b6"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "resamp_tot = filtered_df.set_index(\"start_datetime\").resample(frq).first().reset_index()\n",
-    "\n",
-    "fig, ax = plt.subplots(figsize=(12, 6))\n",
-    "ax.plot(resamp_tot[\"start_datetime\"], resamp_tot[\"Angle\"])\n",
-    "plt.title(\"Angle of the instrument over time\")\n",
-    "plt.show()"
-   ],
-   "id": "14ff46dec308dcc1",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "fig2, ax = plt.subplots(figsize=(12, 6))\n",
-    "ax.plot(resamp_tot[\"start_datetime\"], resamp_tot[\"Temp\"])\n",
-    "plt.title(\"Temperature over time\")\n",
-    "plt.show()"
-   ],
-   "id": "db07736375a767d9",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "fig3, ax = plt.subplots(figsize=(12, 6))\n",
-    "ax.plot(resamp_tot[\"start_datetime\"], resamp_tot[\"%TimeLost\"])\n",
-    "plt.title(\"%TimeLost over time\")\n",
-    "plt.show()"
-   ],
-   "id": "7409831bc24271e3",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Extract your processed data\n",
-    "This dataframe is now compatible for analyses on the next notebook and on R."
-   ],
-   "id": "c64d09af5a11213d"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "filtered_df.to_csv(r\"U:\\Hours_DPM_FBUZZ_CA4.csv\", index=False) #Name your file. 🐬",
-   "id": "f1f9ec385038ba87",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### First visualization\n",
-    "Precise the coordinates of the location of your listening point"
-   ],
-   "id": "a1b31aa6bd8f4d70"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "outputs": [],
-   "execution_count": null,
-   "source": "",
-   "id": "3fda0cc8174fa757"
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/user_case/example_FPOD-CPOD_firstresults.ipynb b/user_case/example_FPOD-CPOD_firstresults.ipynb
deleted file mode 100644
index 1c3366b..0000000
--- a/user_case/example_FPOD-CPOD_firstresults.ipynb
+++ /dev/null
@@ -1,251 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "id": "initial_id",
-   "metadata": {
-    "collapsed": true
-   },
-   "source": [
-    "from pandas import (\n",
-    "    concat,\n",
-    "    read_csv,\n",
-    "    to_datetime,\n",
-    ")\n",
-    "\n",
-    "from post_processing.utils.fpod_utils import extract_site, percent_calc, year_percent, ym_percent, create_matrix,hist_mean_h, hist_mean_m, hist_mean_s\n",
-    "from post_processing.utils.core_utils import get_season"
-   ],
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "## Overview\n",
-    "Import the right dataset."
-   ],
-   "id": "36421fdbbca9aed6"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "#### *CALAIS*",
-   "id": "caea0e065ad8068c"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "ca4 = read_csv(r\"L:\\acoustock\\Bioacoustique\\DATASETS\\CPOD_PROJETS\\CALAIS\\DATA\\DATA_FULL_OTHERCET\\Hours_DPM_FBUZZ_CA4_ssTL.csv\")\n",
-    "walde = read_csv(r\"L:\\acoustock\\Bioacoustique\\DATASETS\\CPOD_PROJETS\\CALAIS\\DATA\\DATA_FULL_OTHERCET\\Hours_DPM_FBUZZ_Walde_ssTL.csv\")\n",
-    "\n",
-    "data_c = concat([ca4, walde])\n",
-    "data_c[\"start_datetime\"] = to_datetime(data_c[\"start_datetime\"])\n",
-    "data_c[\"start_datetime\"] = data_c[\"start_datetime\"].apply(lambda x : x.tz_convert(\"Europe/Paris\"))\n",
-    "data_c[\"Hour\"] = data_c[\"start_datetime\"].dt.hour"
-   ],
-   "id": "1268d9e6ce5cdf32",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "#### *CETIROISE*",
-   "id": "b426e672fdd5c6b8"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "ceti = read_csv(r\"U:\\Hours_DPM_FBUZZ_CETIROISE.csv\")\n",
-    "\n",
-    "ceti[\"start_datetime\"] = to_datetime(ceti[\"start_datetime\"])\n",
-    "ceti[\"start_datetime\"] = ceti[\"start_datetime\"].apply(lambda x : x.tz_convert(\"CET\")) #TimeZone Central European Time\n",
-    "ceti[\"Hour\"] = ceti[\"start_datetime\"].dt.hour"
-   ],
-   "id": "870bc0a014561ba8",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "#### *KERGUELEN*",
-   "id": "17a5ce1338f6cd1a"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "a = read_csv(r\"U:\\Hours_DPM_FBUZZ_A.csv\")\n",
-    "b = read_csv(r\"U:\\Hours_DPM_FBUZZ_B.csv\")\n",
-    "c = read_csv(r\"U:\\Hours_DPM_FBUZZ_C.csv\")\n",
-    "d = read_csv(r\"U:\\Hours_DPM_FBUZZ_D.csv\")\n",
-    "\n",
-    "data_k = concat([a, b, c, d])\n",
-    "data_k[\"start_datetime\"] = to_datetime(data_k[\"start_datetime\"])\n",
-    "data_k[\"start_datetime\"] = data_k[\"start_datetime\"].apply(lambda x : x.tz_convert(\"Indian/Kerguelen\"))\n",
-    "data_k[\"Hour\"] = data_k[\"start_datetime\"].dt.hour"
-   ],
-   "id": "d65697a1f1487f4c",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### First results\n",
-    "Precise your dataset."
-   ],
-   "id": "9fc3b5075bf7ff2c"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "data = data_c #🐬",
-   "id": "add4a626d6cc25a4",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "data = extract_site(data)\n",
-    "data[\"YMH\"] = data[\"Year\"].astype(str) + '-' + data[\"Month\"].astype(str) + '-' + data[\"Hour\"].astype(str)\n",
-    "y_per = percent_calc(data, \"Year\")\n",
-    "#t_per = percent_calc(data, \"TRAVAUX\")"
-   ],
-   "id": "37ecc80eda8e57ed",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "ym_per = percent_calc(data, \"YM\")\n",
-    "ym_per[\"YM\"] = to_datetime(ym_per[\"YM\"])\n",
-    "ym_per[\"Season\"] = ym_per[\"YM\"].apply(lambda x: get_season(x)[0]) #If in the southern hemisphere, write \"get_season(x, northern = False)\".\n",
-    "ym_per[\"Month\"] = ym_per[\"YM\"].dt.month\n",
-    "ym_per[\"Year\"] = ym_per[\"YM\"].dt.year"
-   ],
-   "id": "2b988869ed2466e1",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "per_h = percent_calc(data, \"YMH\")\n",
-    "per_h[[\"Y\",\"M\",\"Hour\"]] = per_h[\"YMH\"].str.split(\"-\", expand=True)\n",
-    "per_h[\"Hour\"] = per_h[\"Hour\"].astype(int)"
-   ],
-   "id": "cf704032c4a59a7b",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "matrice_s = create_matrix(ym_per, [\"site.name\"],[\"%DPH\", \"FBR\"])\n",
-    "matrice_m = create_matrix(ym_per, [\"site.name\", \"Month\"],[\"%click\", \"FBR\"])\n",
-    "matrice_h = create_matrix(per_h, [\"site.name\", \"Hour\"],[\"%click\", \"FBR\"])"
-   ],
-   "id": "caf3f71c6b6f70ca",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "hist_mean_s(\n",
-    "    matrice_s,\n",
-    "    metric_mean=\"%DPH_mean\",\n",
-    "    metric_std=\"%DPH_std\",\n",
-    "    y_lab=\"Moyenne %DPH\",\n",
-    "    title_suffix=\"%DPH\"\n",
-    ")"
-   ],
-   "id": "2ff751ae02e80285",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "year_percent(y_per, \"FBR\")",
-   "id": "29f2703ab28c5b28",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "ym_percent(ym_per, \"DPM\")",
-   "id": "a2dacac3caecff5f",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "hist_mean_m(\n",
-    "    matrice_m,\n",
-    "    metric_mean=\"%click_mean\",\n",
-    "    metric_std=\"%click_std\",\n",
-    "    y_lab=\"Moyenne %click\",\n",
-    "    title_suffix=\"%click\"\n",
-    ")"
-   ],
-   "id": "71161e7545bb1414",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "hist_mean_h(\n",
-    "    matrice_h,\n",
-    "    metric_mean=\"FBR_mean\",\n",
-    "    metric_std=\"FBR_std\",\n",
-    "    y_lab=\"Feeding buzz ratio\",\n",
-    "    title_suffix=\"FBR\"\n",
-    ")"
-   ],
-   "id": "5cbea8601bce2172",
-   "outputs": [],
-   "execution_count": null
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/user_case/example_FPOD-CPOD_raw.ipynb b/user_case/example_FPOD-CPOD_raw.ipynb
deleted file mode 100644
index 8412c3b..0000000
--- a/user_case/example_FPOD-CPOD_raw.ipynb
+++ /dev/null
@@ -1,420 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "id": "initial_id",
-   "metadata": {
-    "collapsed": true,
-    "ExecuteTime": {
-     "end_time": "2026-01-20T10:24:33.069494Z",
-     "start_time": "2026-01-20T10:24:29.180185Z"
-    }
-   },
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "import pytz\n",
-    "\n",
-    "from post_processing.utils.fpod_utils import load_pod_folder, pod2aplose, actual_data, filter_by_metadatax, process_feeding_buzz, load_pod_txt, add_utc\n",
-    "from post_processing.utils.core_utils import json2df\n",
-    "\n",
-    "import logging\n",
-    "from pathlib import Path\n",
-    "import matplotlib.pyplot as plt\n",
-    "import matplotlib.ticker as ticker\n",
-    "from pandas import Timestamp, to_datetime\n",
-    "from pandas.tseries import frequencies\n",
-    "from post_processing.dataclass.data_aplose import DataAplose\n",
-    "from post_processing.utils.core_utils import get_season, get_count\n",
-    "from osekit import setup_logging\n",
-    "from user_case.config import season_color\n",
-    "setup_logging(Path(r\"C:\\Users\\dupontma2\\Documents\\Git\\OSmOSE\\OSmOSE_post_processing\\src\\post_processing\\logging_config.yaml\"), logging.ERROR)"
-   ],
-   "outputs": [],
-   "execution_count": 1
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Load data\n",
-    "🐬 = input to modify \\\n",
-    "Import your raw FPOD or CPOD data. All files for one site must be stored in the same folder and identified by their respective phases. \\\n",
-    "You also need to import your metadata file."
-   ],
-   "id": "c464f241817a1407"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-01-20T10:32:32.641354Z",
-     "start_time": "2026-01-20T10:32:09.650669Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "pod_files = Path(r\"U:\\CA4\") #Path to your data folder. 🐬\n",
-    "path = load_pod_folder(pod_files) #Process all your POD.csv files.\n",
-    "\n",
-    "fb_files = Path(r\"U:\\fb_CA4\")  #Path to your click details folder. 🐬\n",
-    "json = Path(r\"C:\\Users\\fouinel\\Downloads\\deployment_calais.json\") #Path to your metadata file. 🐬\n",
-    "\n",
-    "print(path.head())\n",
-    "df_0 = path.dropna()\n",
-    "\n",
-    "metadatax = json2df(json_path=json)\n",
-    "metadatax[\"deploy.name\"] = (metadatax[\"site.name\"].astype(str) + \"_\" +  metadatax[\"campaign.name\"].astype(str))"
-   ],
-   "id": "6cf23db3b4288c29",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                      File          ChunkEnd DPM  Nall MinsOn deploy.name\n",
-      "0  CA4  POD2397 file01.CP3  14/05/2014 07:07   0     0      1  CA4_Phase1\n",
-      "1  CA4  POD2397 file01.CP3  14/05/2014 07:08   0     8      1  CA4_Phase1\n",
-      "2  CA4  POD2397 file01.CP3  14/05/2014 07:09   0     4      1  CA4_Phase1\n",
-      "3  CA4  POD2397 file01.CP3  14/05/2014 07:10   0   251      1  CA4_Phase1\n",
-      "4  CA4  POD2397 file01.CP3  14/05/2014 07:11   0  4095      1  CA4_Phase1\n"
-     ]
-    }
-   ],
-   "execution_count": 2
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "d_beg_end = actual_data(df_0, metadatax) #Extract the beginning and end of recording for every phase.",
-   "id": "fa52f8971b61aaf6",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-01-20T10:32:43.618484Z",
-     "start_time": "2026-01-20T10:32:43.243995Z"
-    }
-   },
-   "cell_type": "code",
-   "source": "df_1 = df_0[df_0[\"DPM\"] !=\"0\" ] #Remove the 0 to lighten the APLOSE file.",
-   "id": "769e128f2a5293e1",
-   "outputs": [],
-   "execution_count": 3
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### APLOSE format\n",
-    "#### *C-POD*\n",
-    "Use cpod2aplose if you are managing C-POD data."
-   ],
-   "id": "dd03975b7aef7eed"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-01-20T10:32:50.772619Z",
-     "start_time": "2026-01-20T10:32:50.060639Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "df_aplose = pod2aplose(df_1, pytz.utc, \"CA4\", \"Marsouin\", \"CPOD\") #Precise site name, species and instrument. 🐬\n",
-    "print(df_aplose.head())"
-   ],
-   "id": "4cc867627d677529",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  dataset                      filename  start_time  end_time  \\\n",
-      "0     CA4  2014-05-16T02:30:00.000+0000           0        60   \n",
-      "1     CA4  2014-05-17T03:52:00.000+0000           0        60   \n",
-      "2     CA4  2014-05-17T04:47:00.000+0000           0        60   \n",
-      "3     CA4  2014-05-19T17:06:00.000+0000           0        60   \n",
-      "4     CA4  2014-05-20T11:07:00.000+0000           0        60   \n",
-      "\n",
-      "   start_frequency  end_frequency annotation annotator  \\\n",
-      "0                0              0   Marsouin      CPOD   \n",
-      "1                0              0   Marsouin      CPOD   \n",
-      "2                0              0   Marsouin      CPOD   \n",
-      "3                0              0   Marsouin      CPOD   \n",
-      "4                0              0   Marsouin      CPOD   \n",
-      "\n",
-      "                 start_datetime                  end_datetime  is_box  \\\n",
-      "0  2014-05-16T02:30:00.000+0000  2014-05-16T02:31:00.000+0000       0   \n",
-      "1  2014-05-17T03:52:00.000+0000  2014-05-17T03:53:00.000+0000       0   \n",
-      "2  2014-05-17T04:47:00.000+0000  2014-05-17T04:48:00.000+0000       0   \n",
-      "3  2014-05-19T17:06:00.000+0000  2014-05-19T17:07:00.000+0000       0   \n",
-      "4  2014-05-20T11:07:00.000+0000  2014-05-20T11:08:00.000+0000       0   \n",
-      "\n",
-      "  deploy.name  \n",
-      "0  CA4_Phase1  \n",
-      "1  CA4_Phase1  \n",
-      "2  CA4_Phase1  \n",
-      "3  CA4_Phase1  \n",
-      "4  CA4_Phase1  \n"
-     ]
-    }
-   ],
-   "execution_count": 4
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Clean your dataset\n",
-    "Remove useless lines, recorder outside the instrument submersion. Export your file to the aplose format. You can change the name of the file to match the project you are working on."
-   ],
-   "id": "d2c642658dbfe278"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-01-20T10:33:14.509811Z",
-     "start_time": "2026-01-20T10:33:14.070492Z"
-    }
-   },
-   "cell_type": "code",
-   "source": "cleared = filter_by_metadatax(df_aplose, metadatax)",
-   "id": "895bd5a116918285",
-   "outputs": [],
-   "execution_count": 5
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-01-20T10:33:24.735234Z",
-     "start_time": "2026-01-20T10:33:24.723966Z"
-    }
-   },
-   "cell_type": "code",
-   "source": "cleared = cleared.drop_duplicates(subset=['start_datetime'], keep=\"last\")",
-   "id": "1a31da7341f3d8c9",
-   "outputs": [],
-   "execution_count": 7
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Feeding buzzes processing\n",
-    "Use \"Dauphin\", Marsouin\" or \"Commerson\" to get different ICI processing."
-   ],
-   "id": "4cf0b89a9491884"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "fb_all = load_pod_txt(fb_files) #Read all your FB.txt files.\n",
-    "fb_all = process_feeding_buzz(fb_all, \"Marsouin\") #Categorize the minutes (positive or not to FB detection). 🐬\n",
-    "add_utc(fb_all, [\"start_datetime\"], \"min\")"
-   ],
-   "id": "64f824e02131d90a",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "dpm_fb = cleared.merge(fb_all[[\"start_datetime\", \"Foraging\"]], on=[\"start_datetime\"], how=\"left\") #Merge DPM and FB dataframes",
-   "id": "e90f6d91de3f8ce3",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "### Export",
-   "id": "d114ed7164cfd0da"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "d_beg_end.to_csv(r\"U:\\Deb_Fin_CA4.csv\", index=False) #Export the new file. 🐬\n",
-    "dpm_fb.to_csv(r\"U:\\APLOSE_CA4_pos.csv\", index=False) #Name your file. 🐬"
-   ],
-   "id": "9d34e00f4e8147e8",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Explore\n",
-    "First visualization of the data"
-   ],
-   "id": "a85ea092d9fc197c"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "data = DataAplose(dpm_fb)",
-   "id": "639c474690373895",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "bin_size = frequencies.to_offset(\"1d\")\n",
-    "ticks = frequencies.to_offset(\"6BMS\")\n",
-    "fmt = \"%b %y\""
-   ],
-   "id": "cb476b5655bdff42",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "data.lat = 50.973333 #CA4: 51.00035 ; Walde: 50.973333 ; A: -49.38765 ; B: -49.424733 ; C: -49.4677 ; D: -49.47175\n",
-    "data.lon = 1.8117 #CA4: 1.879667 ; Walde: 1.8117 ; A: 69.9449 ; B: 69.932383 ; C: 70.081067 ; D: 69.836617"
-   ],
-   "id": "3fc33f2acf84ea34",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "#### Reshape the data\n",
-    "Set beginning and end of the chosen window."
-   ],
-   "id": "2857f26f8c168ad3"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "data.df[\"end_datetime\"] = to_datetime(data.df[\"end_datetime\"])\n",
-    "data2 = data.reshape(begin=Timestamp(\"2013 11 01\"), end=Timestamp(\"2025 08 01\"))\n",
-    "tz = pytz.timezone(\"UTC\")\n",
-    "data2.change_tz(tz)"
-   ],
-   "id": "75e00c1920b69409",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "#### Heatmap",
-   "id": "cdf3a92dfb6514d6"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "fig, ax = plt.subplots(1, 1)\n",
-    "ax = data2.set_ax(ax=ax, x_ticks_res=ticks, date_format=fmt)\n",
-    "data2.plot(\n",
-    "    mode=\"heatmap\",\n",
-    "    annotator=data2.annotators[0],\n",
-    "    label=data2.labels[0],\n",
-    "    ax=ax,\n",
-    "    bin_size=bin_size,\n",
-    "    show_rise_set=True,\n",
-    ")\n",
-    "plt.tight_layout()\n",
-    "plt.show()"
-   ],
-   "id": "6e1832101aec4156",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "#### Detections over time",
-   "id": "b0f3c6b7fc08a2be"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "fig2, ax = plt.subplots(1, 1)\n",
-    "ax = data2.set_ax(ax=ax, x_ticks_res=ticks, date_format=fmt)\n",
-    "data2.plot(\n",
-    "    mode=\"scatter\",\n",
-    "    annotator=data2.annotators[0],\n",
-    "    label=data2.labels[0],\n",
-    "    ax=ax,\n",
-    "    show_rise_set=True,\n",
-    ")\n",
-    "plt.tight_layout()\n",
-    "plt.show()"
-   ],
-   "id": "66b43e53fb17037",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "#### DPM per day",
-   "id": "c7ea248b277edb65"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "df_filtered = data2.filter_df(data2.annotators[0], data2.labels[0])\n",
-    "df_counts = get_count(df_filtered, bin_size)\n",
-    "\n",
-    "df_counts[\"Season\"] = df_counts.index.to_series().apply(lambda x: get_season(x)[0])\n",
-    "df_counts[\"colors\"] = df_counts[\"Season\"].map(season_color).fillna(\"gray\")\n",
-    "\n",
-    "fig3, ax = plt.subplots(1, 1)\n",
-    "ax = data2.set_ax(ax=ax, x_ticks_res=ticks, date_format=fmt)\n",
-    "data2.plot(\n",
-    "    mode=\"histogram\",\n",
-    "    annotator=data2.annotators[0],\n",
-    "    label=data2.labels[0],\n",
-    "    color=df_counts[\"colors\"].tolist(),\n",
-    "    ax=ax,\n",
-    "    bin_size=bin_size,\n",
-    "    legend=True,\n",
-    ")\n",
-    "ax.set_ylim(0, 200)\n",
-    "ax.yaxis.set_major_locator(ticker.MaxNLocator(nbins=10))\n",
-    "plt.tight_layout()\n",
-    "plt.show()"
-   ],
-   "id": "81b0dafa8adc20e9",
-   "outputs": [],
-   "execution_count": null
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/user_case/resource/CPOD-FPOD_yaml.yml b/user_case/resource/CPOD-FPOD_yaml.yml
deleted file mode 100644
index ded85b0..0000000
--- a/user_case/resource/CPOD-FPOD_yaml.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-#This file is to be used to load an APLOSE result csv file.
-#If a parameter is set here to `null`, it will be loaded with a `None` value in the Python scripts.
-#For parameter definition, see `sort_detections` function in `utils\def_func`.
-#
-#Note:
-#  - Several csv files can be loaded at once, to perform this copy the template and paste it at the end of the present file.
-
-#########################################
-'C:\Users\dupontma2\Downloads\data_aplose.csv':
-  # timebin: detection time resolution in seconds
-  'timebin_new': null
-  # begin datetime: '2000-01-01T00:00:00+0000'
-  'begin': null
-  # end datetime: '2000-01-01T00:00:00+0000'
-  'end': null
-  # annotator: ['annotator1', 'annotator2']
-  'annotator': null
-  # annotation: ['annotation1', 'annotation2']
-  'annotation': "Marsouin"
-  # box: boolean value, whether to keep strong annotations
-  'box': False
-  # timestamp file
-  'timestamp_file': null
-  # user selection
-  'user_sel': all
-  # f_min filter
-  'f_min': null
-  # f_max filter
-  'f_max': null
-  # score
-  'score': null
-  # filename format
-  'filename_format': '%Y-%m-%dT%H:%M:%S.%f%z'
\ No newline at end of file
diff --git a/user_case/user_case_CALAIS.ipynb b/user_case/user_case_CALAIS.ipynb
deleted file mode 100644
index f0f109a..0000000
--- a/user_case/user_case_CALAIS.ipynb
+++ /dev/null
@@ -1,581 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "id": "initial_id",
-   "metadata": {
-    "collapsed": true,
-    "ExecuteTime": {
-     "end_time": "2026-01-21T13:29:51.583226Z",
-     "start_time": "2026-01-21T13:29:43.385674Z"
-    }
-   },
-   "source": [
-    "from pathlib import Path\n",
-    "\n",
-    "import pytz\n",
-    "from pandas import (\n",
-    "    concat,\n",
-    "    read_csv,\n",
-    "    to_datetime,\n",
-    ")\n",
-    "\n",
-    "from post_processing.dataclass.data_aplose import DataAplose\n",
-    "from post_processing.utils.fpod_utils import load_pod_folder, pod2aplose, actual_data, filter_by_metadatax, resample_dpm, load_pod_txt, build_range, extract_site, percent_calc, site_percent, year_percent, ym_percent, month_percent, hour_percent, \\\n",
-    "    process_feeding_buzz\n",
-    "from post_processing.utils.core_utils import json2df,get_season"
-   ],
-   "outputs": [],
-   "execution_count": 1
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": [
-    "### Pre-processing\n",
-    "\n"
-   ],
-   "id": "e8e8c57c7f4197fe"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "Import your csv files. All files for one site must be stored in the same folder.",
-   "id": "6f9beab2dcba1a9c"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-10-17T09:25:01.549663Z",
-     "start_time": "2025-10-17T09:24:48.208563Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "pod_files = Path(r\"U:\\Walde\")\n",
-    "path = load_pod_folder(pod_files)\n",
-    "print(path.head())\n",
-    "\n",
-    "df_0 = path.dropna()\n",
-    "\n",
-    "json = Path(r\"C:\\Users\\fouinel\\Downloads\\deployment_calais.json\") #Path to your metadata file.\n",
-    "metadatax = json2df(json_path=json)\n",
-    "\n",
-    "metadatax[\"deploy.name\"] = (metadatax[\"site.name\"].astype(str) + \"_\" +\n",
-    "                           metadatax[\"campaign.name\"].astype(str))"
-   ],
-   "id": "8636a8a27fe2af47",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                 File          ChunkEnd  DPM    Nall  MinsOn   deploy.name\n",
-      "0  POD2399 file01.CP3  14/05/2014 08:03  0.0     0.0     1.0  Walde_Phase1\n",
-      "1  POD2399 file01.CP3  14/05/2014 08:04  0.0   799.0     1.0  Walde_Phase1\n",
-      "2  POD2399 file01.CP3  14/05/2014 08:05  0.0     0.0     1.0  Walde_Phase1\n",
-      "3  POD2399 file01.CP3  14/05/2014 08:06  0.0  3361.0     1.0  Walde_Phase1\n",
-      "4  POD2399 file01.CP3  14/05/2014 08:07  0.0   421.0     1.0  Walde_Phase1\n"
-     ]
-    }
-   ],
-   "execution_count": 11
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-10-17T09:34:43.092415Z",
-     "start_time": "2025-10-17T09:27:26.409365Z"
-    }
-   },
-   "cell_type": "code",
-   "source": "d_beg_end = actual_data(df_0, metadatax)",
-   "id": "4208969d9e509a8",
-   "outputs": [],
-   "execution_count": 12
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-10-17T09:35:32.137350Z",
-     "start_time": "2025-10-17T09:35:32.089860Z"
-    }
-   },
-   "cell_type": "code",
-   "source": "d_beg_end.to_csv(r\"U:\\Deb_Fin_Walde.csv\", index=False)",
-   "id": "6fb6f4fa675d7cab",
-   "outputs": [],
-   "execution_count": 13
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-10-17T09:35:33.725437Z",
-     "start_time": "2025-10-17T09:35:33.670018Z"
-    }
-   },
-   "cell_type": "code",
-   "source": "df_1 = df_0[df_0[\"DPM\"] !=0 ]",
-   "id": "597efd1d90e3d069",
-   "outputs": [],
-   "execution_count": 14
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "### APLOSE format",
-   "id": "4f8c83c96f0b6ff4"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "Chose the right function, depending on the instrument you are working with.",
-   "id": "9849c47189cf1a85"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "#### *CPOD*",
-   "id": "8ed339c688bdef1"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-10-17T09:35:38.378283Z",
-     "start_time": "2025-10-17T09:35:35.755020Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "df_aplose = pod2aplose(df_1, pytz.utc, \"Walde\", \"Marsouin\", \"CPOD\")\n",
-    "print(df_aplose.head())"
-   ],
-   "id": "812ed7c0c5e258e7",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  dataset filename  start_time  end_time  start_frequency  end_frequency  \\\n",
-      "0   Walde                    0        60                0              0   \n",
-      "1   Walde                    0        60                0              0   \n",
-      "2   Walde                    0        60                0              0   \n",
-      "3   Walde                    0        60                0              0   \n",
-      "4   Walde                    0        60                0              0   \n",
-      "\n",
-      "  annotation annotator                start_datetime  \\\n",
-      "0   Marsouin      FPOD  2014-05-15T16:56:00.000+0000   \n",
-      "1   Marsouin      FPOD  2014-05-17T22:53:00.000+0000   \n",
-      "2   Marsouin      FPOD  2014-05-17T22:54:00.000+0000   \n",
-      "3   Marsouin      FPOD  2014-05-18T14:05:00.000+0000   \n",
-      "4   Marsouin      FPOD  2014-05-21T14:19:00.000+0000   \n",
-      "\n",
-      "                   end_datetime  is_box   deploy.name  \n",
-      "0  2014-05-15T16:57:00.000+0000       0  Walde_Phase1  \n",
-      "1  2014-05-17T22:54:00.000+0000       0  Walde_Phase1  \n",
-      "2  2014-05-17T22:55:00.000+0000       0  Walde_Phase1  \n",
-      "3  2014-05-18T14:06:00.000+0000       0  Walde_Phase1  \n",
-      "4  2014-05-21T14:20:00.000+0000       0  Walde_Phase1  \n"
-     ]
-    }
-   ],
-   "execution_count": 15
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "#### *FPOD*",
-   "id": "a39bb10d8ac60a27"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-10-17T09:23:23.552890Z",
-     "start_time": "2025-10-17T09:23:22.810583Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "df_aplose = pod2aplose(df_1, pytz.utc, \"CETIROISE\", \"Marsouin\", \"FPOD\")\n",
-    "print(df_aplose.head())"
-   ],
-   "id": "9b632673397a184",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  dataset filename  start_time  end_time  start_frequency  end_frequency  \\\n",
-      "0     CA4                    0        60                0              0   \n",
-      "1     CA4                    0        60                0              0   \n",
-      "2     CA4                    0        60                0              0   \n",
-      "3     CA4                    0        60                0              0   \n",
-      "4     CA4                    0        60                0              0   \n",
-      "\n",
-      "  annotation annotator                start_datetime  \\\n",
-      "0   Marsouin      FPOD  2014-05-17T03:52:00.000+0000   \n",
-      "1   Marsouin      FPOD  2014-05-17T04:47:00.000+0000   \n",
-      "2   Marsouin      FPOD  2014-05-19T17:06:00.000+0000   \n",
-      "3   Marsouin      FPOD  2014-05-20T11:07:00.000+0000   \n",
-      "4   Marsouin      FPOD  2014-05-20T11:16:00.000+0000   \n",
-      "\n",
-      "                   end_datetime  is_box deploy.name  \n",
-      "0  2014-05-17T03:53:00.000+0000       0  CA4_Phase1  \n",
-      "1  2014-05-17T04:48:00.000+0000       0  CA4_Phase1  \n",
-      "2  2014-05-19T17:07:00.000+0000       0  CA4_Phase1  \n",
-      "3  2014-05-20T11:08:00.000+0000       0  CA4_Phase1  \n",
-      "4  2014-05-20T11:17:00.000+0000       0  CA4_Phase1  \n"
-     ]
-    }
-   ],
-   "execution_count": 7
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "### Remove non usable lines",
-   "id": "7860838f8514da39"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "Import the .json file available on metadatax.",
-   "id": "32f8ff8f9ece35a8"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-10-17T09:35:54.374270Z",
-     "start_time": "2025-10-17T09:35:54.303578Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "\n",
-    "\n",
-    "cleared = filter_by_metadatax(df_aplose, metadatax) #Remove lines captures outside the instrument submersion."
-   ],
-   "id": "ed6a06c522aea169",
-   "outputs": [],
-   "execution_count": 16
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "Export your file to the aplose format. You can change the name of the file to match the project you are working on.",
-   "id": "8f5fe75cc3463971"
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-10-17T09:36:06.597522Z",
-     "start_time": "2025-10-17T09:35:56.739495Z"
-    }
-   },
-   "cell_type": "code",
-   "source": "cleared.to_csv(r\"U:\\APLOSE_Walde_pos.csv\", index=False) #You can stock all DPM for a site in a DataAplose file.",
-   "id": "76f70cb6c6658ba6",
-   "outputs": [],
-   "execution_count": 17
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "### Load data",
-   "id": "f5d38266dc9d5273"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "Use the yaml file to import your aplose files one at a time.",
-   "id": "2ce11c6e57f38690"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "yaml_file = Path(r\"resource\\CPOD-FPOD_yaml.yml\")\n",
-    "data_list = DataAplose.from_yaml(file=yaml_file)\n",
-    "print(data_list.df.head())"
-   ],
-   "id": "6837593897111b0a",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "### Format choice\n",
-   "id": "9f93eb863e3e3a9e"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "Here you need to choose the format in which you want to visualise the positive detections. This aims to replace the functionality available in CPOD.exe.",
-   "id": "925d92d3eec065e6"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "outputs": [],
-   "execution_count": null,
-   "source": "frq = \"D\" #This argument will determine the format in which you want to visualise your data. Use \"D\", \"h\" or \"10min\".",
-   "id": "256b756d05c08294"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "resamp = resample_dpm(data_list.df, frq=frq, extra_columns=[\"deploy.name\"])",
-   "id": "6cc79b2aeef076ed",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "### Add the feeding buzzes",
-   "id": "8375ddbe07ad0aee"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "Import your click details files. All files for one site must be stacked in the same folder.",
-   "id": "9753f4ba20c7267e"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "fb_files = Path(r\"U:\\fb_fpod_cetiroise_c\") #Path to your click details folder.\n",
-    "fb_all = load_pod_txt(fb_files)\n",
-    "fb_all = process_feeding_buzz(fb_all, \"Marsouin\")\n",
-    "\n",
-    "fb_all[\"start_datetime\"] = fb_all[\"start_datetime\"].dt.floor(frq)\n",
-    "fb = fb_all.groupby(\"start_datetime\")[\"Foraging\"].sum().reset_index()\n",
-    "fb[\"start_datetime\"] = to_datetime(fb[\"start_datetime\"], utc=True)"
-   ],
-   "id": "2b19f90c99252ff3",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "d_tot = resamp.merge(fb, on=\"start_datetime\", how=\"left\")\n",
-    "\n",
-    "#This function aims to reindent 0 between the positive detections. It will be useful to produce first visualization graphs and use this dataset in R.\n",
-    "d_hour = build_range(d_beg_end, frq)"
-   ],
-   "id": "b00c8f1e2210ea7",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "d_fin = d_hour.merge(d_tot, on=[\"start_datetime\", \"deploy.name\"], how=\"left\")",
-   "id": "601787cc806226b0",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "d_fin[[\"DPM\",\"Foraging\"]] = d_fin[[\"DPM\",\"Foraging\"]].fillna(0)\n",
-    "print(d_fin.head())"
-   ],
-   "id": "f93bf1f3ca4f4112",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "## Add time columns",
-   "id": "c7b1d32ed1c99fb7"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "d_fin[\"Year\"] = d_fin[\"start_datetime\"].dt.year\n",
-    "d_fin[\"Month\"] = d_fin[\"start_datetime\"].dt.month\n",
-    "d_fin['YM'] = d_fin[\"Year\"].astype(str) + '-' + d_fin[\"Month\"].astype(str)\n",
-    "d_fin['YM'] = to_datetime(d_fin['YM'])\n",
-    "d_fin[\"Day\"] = d_fin[\"start_datetime\"].dt.day\n",
-    "d_fin[\"Hour\"] = d_fin[\"start_datetime\"].dt.hour\n",
-    "\n",
-    "d_fin[\"FBR\"] = d_fin[\"Foraging\"] / d_fin[\"DPM\"]\n",
-    "d_fin[\"DPH\"] = (d_fin[\"DPM\"] >0).astype(int)"
-   ],
-   "id": "a2261ce5093a3104",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "d_fin[\"FBR\"] = d_fin[\"FBR\"].fillna(0)\n",
-    "d_fin.to_csv(r\"U:\\Hours_DPM_FBUZZ_CETIROISE.csv\", index=False)"
-   ],
-   "id": "d606f4f6904b57c6",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "## Overview",
-   "id": "4bc0904182a3f845"
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "#### *Import datasets*",
-   "id": "e1de414e2eb3fa8f"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "ca4 = read_csv(r\"U:\\Hours_DPM_FBUZZ_SiteCA4.csv\")\n",
-    "walde = read_csv(r\"U:\\Hours_DPM_FBUZZ_SiteWalde.csv\")\n",
-    "\n",
-    "data_c = concat([ca4, walde])\n",
-    "data_c[\"start_datetime\"] = to_datetime(data_c[\"start_datetime\"])\n",
-    "data_c[\"start_datetime\"] = data_c[\"start_datetime\"].apply(lambda x : x.tz_convert(\"Europe/Paris\"))\n",
-    "data_c[\"Hour\"] = data_c[\"start_datetime\"].dt.hour"
-   ],
-   "id": "9909fbfdcb8e2e78",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "a = read_csv(r\"U:\\Hours_DPM_FBUZZ_SiteA.csv\")\n",
-    "b = read_csv(r\"U:\\Hours_DPM_FBUZZ_SiteB.csv\")\n",
-    "c = read_csv(r\"U:\\Hours_DPM_FBUZZ_SiteC.csv\")\n",
-    "d = read_csv(r\"U:\\Hours_DPM_FBUZZ_SiteD.csv\")\n",
-    "\n",
-    "data_k = concat([a, b, c, d])\n",
-    "data_k[\"start_datetime\"] = to_datetime(data_k[\"start_datetime\"])\n",
-    "data_k[\"start_datetime\"] = data_k[\"start_datetime\"].apply(lambda x : x.tz_convert(\"Indian/Kerguelen\"))\n",
-    "data_k[\"Hour\"] = data_k[\"start_datetime\"].dt.hour"
-   ],
-   "id": "87e2d1938787aefc",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "ceti = read_csv(r\"U:\\Hours_DPM_FBUZZ_CETIROISE.csv\")\n",
-    "\n",
-    "ceti[\"start_datetime\"] = to_datetime(ceti[\"start_datetime\"])\n",
-    "ceti[\"start_datetime\"] = ceti[\"start_datetime\"].apply(lambda x : x.tz_convert(\"CET\")) #TimeZone Central European Time\n",
-    "ceti[\"Hour\"] = ceti[\"start_datetime\"].dt.hour"
-   ],
-   "id": "5928770d1c47bcad",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "markdown",
-   "source": "#### *Generate graphs*",
-   "id": "200273fc36fb7d5d"
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "data = ceti #Precise which dataset you are working with",
-   "id": "be10e9d690294cff",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": [
-    "data = extract_site(data)\n",
-    "y_per = percent_calc(data, \"Year\")\n",
-    "ym_per = percent_calc(data, \"YM\")\n",
-    "ym_per[\"YM\"] = to_datetime(ym_per[\"YM\"])\n",
-    "ym_per[\"Season\"] = ym_per[\"YM\"].apply(lambda x: get_season(x)[0])\n",
-    "m_per = percent_calc(data, \"Month\")\n",
-    "h_per = percent_calc(data, \"Hour\")\n",
-    "s_per = percent_calc(data)"
-   ],
-   "id": "2826b79097a85607",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "site_percent(s_per, \"%click\")",
-   "id": "ddd1fac6295136c6",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "year_percent(y_per, \"%click\")",
-   "id": "ba7581e97fdbd07c",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "ym_percent(ym_per, \"%click\")",
-   "id": "4de618933c154f86",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "month_percent(m_per, \"%buzzes\")",
-   "id": "7cf84c8744185424",
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "metadata": {},
-   "cell_type": "code",
-   "source": "hour_percent(h_per, \"%click\")",
-   "id": "12d83e9082d711c0",
-   "outputs": [],
-   "execution_count": null
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 67108127f51c196a7603ff022dd584ad8e1d6dc2 Mon Sep 17 00:00:00 2001
From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
Date: Tue, 27 Jan 2026 17:20:56 +0100
Subject: [PATCH 33/33] refacto

---
 src/post_processing/utils/filtering_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py
index a9e0619..3d4832b 100644
--- a/src/post_processing/utils/filtering_utils.py
+++ b/src/post_processing/utils/filtering_utils.py
@@ -485,8 +485,8 @@ def _process_annotator_label_pair(
         time_vector[i] for i, detected in enumerate(detect_vec) if detected
     ]
     file_vector = [
-        # filename_vector[i] for i, detected in enumerate(detect_vec) if detected
-        filename_vector[i + 1] for i, detected in enumerate(detect_vec) if detected
+        filename_vector[i] for i, detected in enumerate(detect_vec) if detected
+        # filename_vector[i + 1] for i, detected in enumerate(detect_vec) if detected
     ]
 
     if not start_datetime: