Skip to content

Commit b0bbee6

Browse files
committed
initiate fpod_utils tests
1 parent e505033 commit b0bbee6

1 file changed

Lines changed: 316 additions & 1 deletion

File tree

tests/test_fpod_utils.py

Lines changed: 316 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""FPOD/ CPOD processing functions tests."""
22
import io
3+
import pytz
34
from datetime import datetime
45
from pathlib import Path
56

67
import pytest
78
from osekit.utils.timestamp_utils import strptime_from_text
8-
from pandas import DataFrame, Timestamp, read_csv, concat
9+
from pandas import DataFrame, Timestamp, Timedelta, read_csv, concat
910
from pandas.testing import assert_frame_equal
1011

1112
from post_processing.utils.fpod_utils import (
@@ -164,8 +165,322 @@ def test_csv_folder_single_file(tmp_path) -> None:
164165
assert all(result["deploy.name"] == "data")
165166
assert list(result.columns) == ["col1", "col2", "deploy.name"]
166167

168+
167169
# pod2aplose
170+
@pytest.fixture
171+
def sample_df():
172+
"""Create a sample POD DataFrame for testing."""
173+
return DataFrame({
174+
"ChunkEnd": ["15/01/2024 10:30", "15/01/2024 11:00", "15/01/2024 09:45"],
175+
"deploy.name": ["deploy1", "deploy2", "deploy1"],
176+
})
177+
178+
179+
@pytest.fixture
180+
def timezone():
181+
"""Return UTC timezone for testing."""
182+
return pytz.UTC
183+
184+
185+
def test_pod2aplose_basic_structure(sample_df, timezone):
186+
"""Test that basic structure and required columns are present."""
187+
result = pod2aplose(
188+
df=sample_df,
189+
tz=timezone,
190+
dataset_name="test_dataset",
191+
annotation="test_annotation",
192+
annotator="test_annotator",
193+
)
194+
195+
expected_columns = [
196+
"dataset",
197+
"filename",
198+
"start_time",
199+
"end_time",
200+
"start_frequency",
201+
"end_frequency",
202+
"annotation",
203+
"annotator",
204+
"start_datetime",
205+
"end_datetime",
206+
"is_box",
207+
"deploy.name",
208+
]
168209

210+
assert isinstance(result, DataFrame)
211+
assert list(result.columns) == expected_columns
212+
assert len(result) == len(sample_df)
213+
214+
215+
def test_pod2aplose_dataset_propagation(sample_df, timezone):
216+
"""Test that dataset name is propagated to all rows."""
217+
result = pod2aplose(
218+
df=sample_df,
219+
tz=timezone,
220+
dataset_name="my_dataset",
221+
annotation="click",
222+
annotator="john",
223+
)
224+
225+
assert all(result["dataset"] == "my_dataset")
226+
227+
228+
def test_pod2aplose_annotation_propagation(sample_df, timezone):
229+
"""Test that annotation is propagated to all rows."""
230+
result = pod2aplose(
231+
df=sample_df,
232+
tz=timezone,
233+
dataset_name="dataset",
234+
annotation="porpoise_click",
235+
annotator="john",
236+
)
237+
238+
assert all(result["annotation"] == "porpoise_click")
239+
240+
241+
def test_pod2aplose_annotator_propagation(sample_df, timezone):
242+
"""Test that annotator is propagated to all rows."""
243+
result = pod2aplose(
244+
df=sample_df,
245+
tz=timezone,
246+
dataset_name="dataset",
247+
annotation="click",
248+
annotator="alice",
249+
)
250+
251+
assert all(result["annotator"] == "alice")
252+
253+
254+
def test_pod2aplose_default_bin_size(sample_df, timezone):
255+
"""Test default bin_size of 60 seconds."""
256+
result = pod2aplose(
257+
df=sample_df,
258+
tz=timezone,
259+
dataset_name="dataset",
260+
annotation="click",
261+
annotator="john",
262+
)
263+
264+
assert all(result["start_time"] == 0)
265+
assert all(result["end_time"] == 60)
266+
267+
268+
def test_pod2aplose_custom_bin_size(sample_df, timezone):
269+
"""Test custom bin_size parameter."""
270+
result = pod2aplose(
271+
df=sample_df,
272+
tz=timezone,
273+
dataset_name="dataset",
274+
annotation="click",
275+
annotator="john",
276+
bin_size=120,
277+
)
278+
279+
assert all(result["start_time"] == 0)
280+
assert all(result["end_time"] == 120)
281+
282+
283+
def test_pod2aplose_frequency_values(sample_df, timezone):
284+
"""Test that frequency values are set to 0."""
285+
result = pod2aplose(
286+
df=sample_df,
287+
tz=timezone,
288+
dataset_name="dataset",
289+
annotation="click",
290+
annotator="john",
291+
)
292+
293+
assert all(result["start_frequency"] == 0)
294+
assert all(result["end_frequency"] == 0)
295+
296+
297+
def test_pod2aplose_is_box_values(sample_df, timezone):
298+
"""Test that is_box values are set to 0."""
299+
result = pod2aplose(
300+
df=sample_df,
301+
tz=timezone,
302+
dataset_name="dataset",
303+
annotation="click",
304+
annotator="john",
305+
)
306+
307+
assert all(result["is_box"] == 0)
308+
309+
310+
def test_pod2aplose_deploy_name_preserved(sample_df, timezone):
311+
"""Test that deploy.name values are preserved from input."""
312+
result = pod2aplose(
313+
df=sample_df,
314+
tz=timezone,
315+
dataset_name="dataset",
316+
annotation="click",
317+
annotator="john",
318+
)
319+
320+
# After sorting, deploy.name should still be present
321+
assert "deploy.name" in result.columns
322+
assert len(result["deploy.name"]) == len(sample_df)
323+
assert set(result["deploy.name"]) == {"deploy1", "deploy2"}
324+
325+
326+
def test_pod2aplose_sorting_by_datetime(timezone):
327+
"""Test that rows are sorted by datetime."""
328+
df = DataFrame({
329+
"ChunkEnd": ["15/01/2024 12:00", "15/01/2024 10:00", "15/01/2024 11:00"],
330+
"deploy.name": ["d1", "d2", "d3"],
331+
})
332+
333+
result = pod2aplose(
334+
df=df, tz=timezone, dataset_name="dataset", annotation="click", annotator="john"
335+
)
336+
337+
# Check that deploy.name follows the sorted order (by time)
338+
assert result["deploy.name"].tolist() == ["d2", "d3", "d1"]
339+
340+
341+
def test_pod2aplose_datetime_formatting():
342+
"""Test that datetime strings are properly formatted."""
343+
df = DataFrame({"ChunkEnd": ["01/02/2024 14:30"], "deploy.name": ["deploy1"]})
344+
345+
result = pod2aplose(
346+
df=df,
347+
tz=pytz.UTC,
348+
dataset_name="dataset",
349+
annotation="click",
350+
annotator="john",
351+
bin_size=60,
352+
)
353+
354+
# Check that datetime strings are present and not empty
355+
assert len(result["start_datetime"].iloc[0]) > 0
356+
assert len(result["end_datetime"].iloc[0]) > 0
357+
assert len(result["filename"].iloc[0]) > 0
358+
359+
360+
def test_pod2aplose_end_datetime_offset(timezone):
361+
"""Test that end_datetime is offset by bin_size from start_datetime."""
362+
df = DataFrame({"ChunkEnd": ["15/01/2024 10:00"], "deploy.name": ["deploy1"]})
363+
364+
result = pod2aplose(
365+
df=df,
366+
tz=timezone,
367+
dataset_name="dataset",
368+
annotation="click",
369+
annotator="john",
370+
bin_size=120,
371+
)
372+
373+
# Both should be valid datetime strings
374+
assert result["start_datetime"].iloc[0] != result["end_datetime"].iloc[0]
375+
376+
377+
def test_pod2aplose_different_timezones():
378+
"""Test with different timezone."""
379+
df = DataFrame({"ChunkEnd": ["15/01/2024 10:00"], "deploy.name": ["deploy1"]})
380+
381+
tz_paris = pytz.timezone("Europe/Paris")
382+
383+
result = pod2aplose(
384+
df=df, tz=tz_paris, dataset_name="dataset", annotation="click", annotator="john"
385+
)
386+
387+
assert len(result) == 1
388+
assert result["dataset"].iloc[0] == "dataset"
389+
390+
391+
def test_pod2aplose_empty_dataframe(timezone):
392+
"""Test handling of empty DataFrame."""
393+
df = DataFrame({"ChunkEnd": [], "deploy.name": []})
394+
395+
result = pod2aplose(
396+
df=df, tz=timezone, dataset_name="dataset", annotation="click", annotator="john"
397+
)
398+
399+
assert len(result) == 0
400+
assert list(result.columns) == [
401+
"dataset",
402+
"filename",
403+
"start_time",
404+
"end_time",
405+
"start_frequency",
406+
"end_frequency",
407+
"annotation",
408+
"annotator",
409+
"start_datetime",
410+
"end_datetime",
411+
"is_box",
412+
"deploy.name",
413+
]
414+
415+
416+
def test_pod2aplose_single_row(timezone):
417+
"""Test with single row DataFrame."""
418+
df = DataFrame({"ChunkEnd": ["20/03/2024 15:45"], "deploy.name": ["single_deploy"]})
419+
420+
result = pod2aplose(
421+
df=df,
422+
tz=timezone,
423+
dataset_name="dataset",
424+
annotation="click",
425+
annotator="john",
426+
bin_size=90,
427+
)
428+
429+
assert len(result) == 1
430+
assert result["deploy.name"].iloc[0] == "single_deploy"
431+
assert result["end_time"].iloc[0] == 90
432+
433+
434+
def test_pod2aplose_does_not_modify_original(sample_df, timezone):
435+
"""Test that the original DataFrame is not modified."""
436+
original_columns = sample_df.columns.tolist()
437+
original_len = len(sample_df)
438+
439+
pod2aplose(
440+
df=sample_df,
441+
tz=timezone,
442+
dataset_name="dataset",
443+
annotation="click",
444+
annotator="john",
445+
)
446+
447+
# Original DataFrame should be unchanged
448+
assert sample_df.columns.tolist() == original_columns
449+
assert len(sample_df) == original_len
450+
assert "_temp_dt" not in sample_df.columns
451+
452+
453+
def test_pod2aplose_large_bin_size(sample_df, timezone):
454+
"""Test with large bin_size value."""
455+
result = pod2aplose(
456+
df=sample_df,
457+
tz=timezone,
458+
dataset_name="dataset",
459+
annotation="click",
460+
annotator="john",
461+
bin_size=3600, # 1 hour
462+
)
463+
464+
assert all(result["end_time"] == 3600)
465+
466+
467+
def test_pod2aplose_index_reset(timezone):
468+
"""Test that index is properly reset after sorting."""
469+
df = DataFrame({
470+
"ChunkEnd": ["15/01/2024 12:00", "15/01/2024 10:00"],
471+
"deploy.name": ["d1", "d2"]
472+
})
473+
474+
result = pod2aplose(
475+
df=df,
476+
tz=timezone,
477+
dataset_name="dataset",
478+
annotation="click",
479+
annotator="john"
480+
)
481+
482+
# Index should be 0, 1 after reset
483+
assert result.index.tolist() == [0, 1]
169484

170485
# meta_cut_aplose
171486

0 commit comments

Comments
 (0)