From e328722c90b558f92ec1b3f331860ebd6325f2f4 Mon Sep 17 00:00:00 2001
From: TQ Zhang <tianqiz04@gmail.com>
Date: Fri, 10 Jan 2025 18:57:36 -0800
Subject: [PATCH 1/5] feat: Add support for nested file structures

Recursively searches nested folders for audio files to process. Extension and files to skip can be specified.
---
 PyHa/IsoAutio.py | 89 ++++++++++++++++++++++++++++++------------------
 1 file changed, 56 insertions(+), 33 deletions(-)

diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py
index a612e6b..1bdf846 100644
--- a/PyHa/IsoAutio.py
+++ b/PyHa/IsoAutio.py
@@ -10,6 +10,7 @@
 from .template_matching.utils import filter, butter_bandpass, generate_specgram, template_matching_local_score_arr
 
 import os
+import glob
 import torch
 import librosa
 import pandas as pd
@@ -155,6 +156,20 @@ def write_confidence(local_score_arr, automated_labels_df):
     automated_labels_df["CONFIDENCE"] = confidences
     return automated_labels_df
 
+def get_files(root_dir, ignore=[], extension=".wav"):
+    """ Recursively searches through folders and subfolders, top-down, from the 
+    given root directory for files to process.
+
+    Args:
+        root_dir (str): path to the root directory
+        ignore (list/iterable): paths to files to ignore, e.g. a template
+        extension (str): file extension of files to look for, e.g. ".wav"
+    """
+    root_dir = os.path.abspath(root_dir)
+    audio_files = glob.glob(f"{root_dir}/**/*{extension}", recursive=True)
+    audio_files = [file for file in audio_files if file not in ignore]
+    return audio_files
+        
 
 def isolate(
         local_scores,
@@ -962,7 +977,7 @@ def generate_automated_labels_microfaune(
     # init labels dataframe
     annotations = pd.DataFrame()
     # generate local scores for every bird file in chosen directory
-    for audio_file in os.listdir(audio_dir):
+    for audio_file in get_files(audio_dir):
         # skip directories
         if os.path.isdir(os.path.join(audio_dir, audio_file)):
             continue
@@ -976,7 +991,7 @@ def generate_automated_labels_microfaune(
         except KeyboardInterrupt:
             exit("Keyboard interrupt")
         except BaseException:
-            checkVerbose("Failed to load" + audio_file, isolation_parameters)
+            checkVerbose(f"Failed to load {audio_file}", isolation_parameters)
             continue
 
         # downsample the audio if the sample rate isn't 44.1 kHz
@@ -991,7 +1006,7 @@ def generate_automated_labels_microfaune(
         except KeyboardInterrupt:
             exit("Keyboard interrupt")
         except:
-            checkVerbose("Failed to Downsample" + audio_file, isolation_parameters)
+            checkVerbose(f"Failed to Downsample {audio_file}", isolation_parameters)
             # resample produces unreadable float32 array so convert back
             # SIGNAL = np.asarray(SIGNAL, dtype=np.int16)
             
@@ -1008,7 +1023,7 @@ def generate_automated_labels_microfaune(
             exit("Keyboard interrupt")
         except BaseException as e:
             checkVerbose(e, isolation_parameters)
-            checkVerbose("Error in detection, skipping" + audio_file, isolation_parameters)
+            checkVerbose(f"Error in detection, skipping {audio_file}", isolation_parameters)
             continue
         
             
@@ -1018,13 +1033,14 @@ def generate_automated_labels_microfaune(
         try:
             # Running moment to moment algorithm and appending to a master
             # dataframe.
+            filename = os.path.split(audio_file)[-1]
             new_entry = isolate(
-                local_scores[0],
-                SIGNAL,
-                SAMPLE_RATE,
-                audio_dir,
-                audio_file,
-                isolation_parameters,
+                local_scores=local_scores[0],
+                SIGNAL=SIGNAL,
+                SAMPLE_RATE=SAMPLE_RATE,
+                audio_dir=audio_dir,
+                filename=filename,
+                isolation_parameters=isolation_parameters,
                 manual_id=manual_id,
                 normalize_local_scores=normalize_local_scores)
             # print(new_entry)
@@ -1036,7 +1052,7 @@ def generate_automated_labels_microfaune(
             exit("Keyboard interrupt")
         except BaseException as e:
             checkVerbose(e, isolation_parameters)
-            checkVerbose("Error in isolating bird calls from" + audio_file, isolation_parameters)
+            checkVerbose(f"Error in isolating bird calls from {audio_file}", isolation_parameters)
 
             continue
     # Quick fix to indexing
@@ -1100,7 +1116,7 @@ def generate_automated_labels_tweetynet(
     # init labels dataframe
     annotations = pd.DataFrame()
     # generate local scores for every bird file in chosen directory
-    for audio_file in os.listdir(audio_dir):
+    for audio_file in get_files(audio_dir):
         # skip directories
         if os.path.isdir(os.path.join(audio_dir, audio_file)):
             continue
@@ -1115,7 +1131,7 @@ def generate_automated_labels_tweetynet(
             exit("Keyboard interrupt")
         except Exception as exp:
             logger.exception(f"Failed to load {audio_file}, parameters: {isolation_parameters}")
-            checkVerbose("Failed to load " + audio_file, isolation_parameters)
+            checkVerbose(f"Failed to load {audio_file}", isolation_parameters)
             continue
             
         # Resample the audio if it isn't the normalized sample rate
@@ -1150,22 +1166,23 @@ def generate_automated_labels_tweetynet(
         try:
             # Running moment to moment algorithm and appending to a master
             # dataframe. 
+            filename = os.path.split(audio_file)[-1]
             if isolation_parameters["tweety_output"]:
                 new_entry = predictions_to_kaleidoscope(
                     predictions, 
                     SIGNAL, 
                     audio_dir, 
-                    audio_file, 
+                    filename, 
                     manual_id, 
                     SAMPLE_RATE)
             else:
                 new_entry = isolate(
-                    local_scores[0],
-                    SIGNAL,
-                    SAMPLE_RATE,
-                    audio_dir,
-                    audio_file,
-                    isolation_parameters,
+                    local_scores=local_scores[0],
+                    SIGNAL=SIGNAL,
+                    SAMPLE_RATE=SAMPLE_RATE,
+                    audio_dir=audio_dir,
+                    filename=filename,
+                    isolation_parameters=isolation_parameters,
                     manual_id=manual_id,
                     normalize_local_scores=normalize_local_scores)
             # print(new_entry)
@@ -1244,7 +1261,7 @@ def generate_automated_labels_FG_BG_separation(
     annotations = pd.DataFrame()
 
     # looping through the folder
-    for audio_file in os.listdir(audio_dir):
+    for audio_file in get_files(audio_dir):
         # skip directories
         if os.path.isdir(os.path.join(audio_dir, audio_file)):
             continue
@@ -1254,7 +1271,7 @@ def generate_automated_labels_FG_BG_separation(
         except KeyboardInterrupt:
             exit("Keyboard Interrupt")
         except BaseException:
-            checkVerbose("Failed to load " + audio_file, isolation_parameters)
+            checkVerbose(f"Failed to load {audio_file}", isolation_parameters)
             continue
         
         # generating local score array from clip
@@ -1265,17 +1282,18 @@ def generate_automated_labels_FG_BG_separation(
         except KeyboardInterrupt:
             exit("Keyboard Interrupt")
         except BaseException:
-            checkVerbose("Failed to collect local score array of " + audio_file, isolation_parameters)
+            checkVerbose(f"Failed to collect local score array of {audio_file}", isolation_parameters)
             continue
 
         # passing through isolation technique
         try:
+            filename = os.path.split(audio_file)[-1]
             new_entry = isolate(
                 local_score_arr,
                 SIGNAL,
                 SAMPLE_RATE,
                 audio_dir,
-                audio_file,
+                filename,
                 isolation_parameters,
                 manual_id=manual_id,
             )
@@ -1287,7 +1305,7 @@ def generate_automated_labels_FG_BG_separation(
             exit("Keyboard Interrupt")
         except BaseException as e:
             checkVerbose(e, isolation_parameters)
-            checkVerbose("Error in isolating bird calls from " + audio_file, isolation_parameters)
+            checkVerbose(f"Error in isolating bird calls from {audio_file}", isolation_parameters)
             continue
 
     annotations.reset_index(inplace=True, drop=True)
@@ -1358,11 +1376,13 @@ def generate_automated_labels_template_matching(
     except KeyboardInterrupt:
         exit("Keyboard Interrupt")
     except BaseException:
-        checkVerbose("Failed to load and process template " + isolation_parameters["template_path"], isolation_parameters)
+        temp_path = isolation_parameters["template_path"]
+        checkVerbose(f"Failed to load and process template {temp_path}", isolation_parameters)
         exit("Can't do template matching without a template")
 
     # looping through the clips to process
-    for audio_file in os.listdir(audio_dir):
+    ignore = [os.path.abspath(isolation_parameters["template_path"])]
+    for audio_file in get_files(audio_dir, ignore=ignore):
         # skip directories
         if os.path.isdir(os.path.join(audio_dir, audio_file)):
             continue
@@ -1374,7 +1394,7 @@ def generate_automated_labels_template_matching(
         except KeyboardInterrupt:
             exit("Keyboard Interrupt")
         except BaseException:
-            checkVerbose("Failed to load " + audio_file, isolation_parameters)
+            checkVerbose(f"Failed to load {audio_file}", isolation_parameters)
             continue
         
         # generating local score array from clip
@@ -1383,17 +1403,18 @@ def generate_automated_labels_template_matching(
         except KeyboardInterrupt:
             exit("Keyboard Interrupt")
         except BaseException:
-            checkVerbose("Failed to collect local score array of " + audio_file, isolation_parameters)
+            checkVerbose(f"Failed to collect local score array of {audio_file}", isolation_parameters)
             continue
 
         # passing through isolation technique
         try:
+            filename = os.path.split(audio_file)[-1]
             new_entry = isolate(
                 local_score_arr,
                 SIGNAL,
                 SAMPLE_RATE,
                 audio_dir,
-                audio_file,
+                filename,
                 isolation_parameters,
                 manual_id=manual_id,
             )
@@ -1405,7 +1426,7 @@ def generate_automated_labels_template_matching(
             exit("Keyboard Interrupt")
         except BaseException as e:
             checkVerbose(e, isolation_parameters)
-            checkVerbose("Error in isolating bird calls from " + audio_file, isolation_parameters)
+            checkVerbose(f"Error in isolating bird calls from {audio_file}", isolation_parameters)
             continue
 
     annotations.reset_index(inplace=True, drop=True)
@@ -1458,6 +1479,7 @@ def generate_automated_labels(
     assert isinstance(normalize_local_scores,bool)
 
     #try:
+    
     if(isolation_parameters["model"] == 'microfaune'):
         annotations = generate_automated_labels_microfaune(
                         audio_dir=audio_dir,
@@ -1502,8 +1524,9 @@ def generate_automated_labels(
     else:
         # print("{model_name} model does not exist"\
         #     .format(model_name=isolation_parameters["model"]))
-        checkVerbose("{model_name} model does not exist"\
-        .format(model_name=isolation_parameters["model"]), isolation_parameters)
+        model_type = isolation_parameters["model"]
+        checkVerbose(f"{model_type} model does not exist",
+                     isolation_parameters)
         annotations = None
     # except:
     #     print("Error. Check your isolation_parameters")

From 532646522fe01d10886fd558b9901cfa7945dfe9 Mon Sep 17 00:00:00 2001
From: TQ Zhang <tianqiz04@gmail.com>
Date: Fri, 10 Jan 2025 19:13:13 -0800
Subject: [PATCH 2/5] fix: allow ignored files/file extension to be chosen by
 generate_automated_labels

---
 PyHa/IsoAutio.py | 86 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 70 insertions(+), 16 deletions(-)

diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py
index 1bdf846..7417fb5 100644
--- a/PyHa/IsoAutio.py
+++ b/PyHa/IsoAutio.py
@@ -164,6 +164,8 @@ def get_files(root_dir, ignore=[], extension=".wav"):
         root_dir (str): path to the root directory
         ignore (list/iterable): paths to files to ignore, e.g. a template
         extension (str): file extension of files to look for, e.g. ".wav"
+    Returns:
+        list of absolute paths to all files with given extension.
     """
     root_dir = os.path.abspath(root_dir)
     audio_files = glob.glob(f"{root_dir}/**/*{extension}", recursive=True)
@@ -852,7 +854,9 @@ def chunk_isolate(
     return pd.DataFrame.from_dict(entry)
 
 
-def generate_automated_labels_birdnet(audio_dir, isolation_parameters):
+def generate_automated_labels_birdnet(
+        audio_dir, 
+        isolation_parameters):
     """
     Function that generates the bird labels for an audio file or across a
     folder using the BirdNet-Lite model
@@ -928,7 +932,9 @@ def generate_automated_labels_microfaune(
         manual_id="bird",
         weight_path=None,
         normalized_sample_rate=44100,
-        normalize_local_scores=False):
+        normalize_local_scores=False,
+        filetype=".wav",
+        ignored_files=[]):
     """
     Function that applies isolation technique on the local scores generated
     by the Microfaune mode across a folder of audio clips. It is determined
@@ -951,6 +957,13 @@ def generate_automated_labels_microfaune(
 
         normalized_sample_rate (int)
             - Sampling rate that the audio files should all be normalized to.
+        
+        filetype (str)
+            - filetype to process
+        
+        ignored_files (list)
+            - absolute paths to files to ignore
+        
 
     Returns:
         Dataframe of automated labels for the audio clips in audio_dir.
@@ -977,7 +990,7 @@ def generate_automated_labels_microfaune(
     # init labels dataframe
     annotations = pd.DataFrame()
     # generate local scores for every bird file in chosen directory
-    for audio_file in get_files(audio_dir):
+    for audio_file in get_files(audio_dir, extension=filetype, ignore=ignored_files):
         # skip directories
         if os.path.isdir(os.path.join(audio_dir, audio_file)):
             continue
@@ -1065,7 +1078,9 @@ def generate_automated_labels_tweetynet(
         manual_id="bird",
         weight_path=None,
         normalized_sample_rate=44100,
-        normalize_local_scores=False):
+        normalize_local_scores=False,
+        filetype=".wav",
+        ignored_files=[]):
     """
     Function that applies isolation technique determined by
     isolation_parameters dictionary across a folder of audio clips.
@@ -1094,6 +1109,12 @@ def generate_automated_labels_tweetynet(
 
         normalize_local_scores (bool) # may want to incorporate into isolation parameters
             - Flag to normalize the local scores.
+            
+        filetype (str)
+            - filetype to process
+        
+        ignored_files (list)
+            - absolute paths to files to ignore
 
     Returns:
         Dataframe of automated labels for the audio clips in audio_dir.
@@ -1116,7 +1137,7 @@ def generate_automated_labels_tweetynet(
     # init labels dataframe
     annotations = pd.DataFrame()
     # generate local scores for every bird file in chosen directory
-    for audio_file in get_files(audio_dir):
+    for audio_file in get_files(audio_dir, extension=filetype, ignore=ignored_files):
         # skip directories
         if os.path.isdir(os.path.join(audio_dir, audio_file)):
             continue
@@ -1206,7 +1227,9 @@ def generate_automated_labels_FG_BG_separation(
         audio_dir,
         isolation_parameters,
         manual_id="foreground",
-        normalized_sample_rate=44100):
+        normalized_sample_rate=44100,
+        filetype=".wav",
+        ignored_files=[]):
     """
     Function that reverse-engineers the approach to foreground-background separation deployed by BirdNET:
     https://www.sciencedirect.com/science/article/pii/S1574954121000273
@@ -1245,6 +1268,12 @@ def generate_automated_labels_FG_BG_separation(
 
         normalized_sample_rate (int)
             - Sampling rate that the audio files should all be normalized to.
+            
+        filetype (str)
+            - filetype to process
+        
+        ignored_files (list)
+            - absolute paths to files to ignore
 
     Returns:
         Dataframe of automated labels for the audio clips in audio_dir.
@@ -1261,7 +1290,7 @@ def generate_automated_labels_FG_BG_separation(
     annotations = pd.DataFrame()
 
     # looping through the folder
-    for audio_file in get_files(audio_dir):
+    for audio_file in get_files(audio_dir, extension=filetype, ignore=ignored_files):
         # skip directories
         if os.path.isdir(os.path.join(audio_dir, audio_file)):
             continue
@@ -1315,7 +1344,9 @@ def generate_automated_labels_template_matching(
         audio_dir,
         isolation_parameters,
         manual_id="template",
-        normalized_sample_rate=44100):
+        normalized_sample_rate=44100,
+        filetype=".wav",
+        ignored_files=[]):
     """
 
 
@@ -1332,6 +1363,12 @@ def generate_automated_labels_template_matching(
 
         normalized_sample_rate (int)
             - Sampling rate that the audio files should all be normalized to.
+            
+        filetype (str)
+            - filetype to process
+        
+        ignored_files (list)
+            - absolute paths to files to ignore
 
     Returns:
         Dataframe of automated labels for the audio clips in audio_dir.
@@ -1381,8 +1418,8 @@ def generate_automated_labels_template_matching(
         exit("Can't do template matching without a template")
 
     # looping through the clips to process
-    ignore = [os.path.abspath(isolation_parameters["template_path"])]
-    for audio_file in get_files(audio_dir, ignore=ignore):
+    ignored_files.append(os.path.abspath(isolation_parameters["template_path"]))
+    for audio_file in get_files(audio_dir, extension=filetype, ignore=ignored_files):
         # skip directories
         if os.path.isdir(os.path.join(audio_dir, audio_file)):
             continue
@@ -1439,7 +1476,9 @@ def generate_automated_labels(
         manual_id="bird",
         weight_path=None,
         normalized_sample_rate=44100,
-        normalize_local_scores=False):
+        normalize_local_scores=False,
+        filetype=".wav",
+        ignored_files=[]):
     """
     Function that generates the bird labels across a folder of audio clips
     given the isolation_parameters
@@ -1465,6 +1504,12 @@ def generate_automated_labels(
         
         normalize_local_scores (bool)
             - Set whether or not to normalize the local scores.
+            
+        filetype (str)
+            - filetype to process
+        
+        ignored_files (list)
+            - absolute paths to files to ignore
 
     Returns:
         Dataframe of automated labels for the audio clips in audio_dir.
@@ -1487,7 +1532,9 @@ def generate_automated_labels(
                         manual_id=manual_id,
                         weight_path=weight_path,
                         normalized_sample_rate=normalized_sample_rate,
-                        normalize_local_scores=normalize_local_scores)
+                        normalize_local_scores=normalize_local_scores,
+                        filetype=filetype,
+                        ignored_files=ignored_files)
     elif(isolation_parameters["model"] == 'birdnet'):
         # We need to delete the some keys from the isolation_parameters
         # because we are unpacking the other arguments
@@ -1498,7 +1545,8 @@ def generate_automated_labels(
             if key in birdnet_parameters.keys():
                 birdnet_parameters.pop(key, None)
         annotations = generate_automated_labels_birdnet(
-                        audio_dir, birdnet_parameters)
+                        audio_dir,
+                        birdnet_parameters)
     elif(isolation_parameters['model'] == 'tweetynet'):
         annotations = generate_automated_labels_tweetynet(
                         audio_dir=audio_dir,
@@ -1506,20 +1554,26 @@ def generate_automated_labels(
                         manual_id=manual_id,
                         weight_path=weight_path,
                         normalized_sample_rate=normalized_sample_rate,
-                        normalize_local_scores=normalize_local_scores)
+                        normalize_local_scores=normalize_local_scores,
+                        filetype=filetype,
+                        ignored_files=ignored_files)
     elif(isolation_parameters["model"]=='fg_bg_dsp_sep'):
         annotations = generate_automated_labels_FG_BG_separation(
            audio_dir=audio_dir,
            isolation_parameters=isolation_parameters,
            manual_id=manual_id,
-           normalized_sample_rate=normalized_sample_rate
+           normalized_sample_rate=normalized_sample_rate,
+           filetype=filetype,
+           ignored_files=ignored_files
         )
     elif (isolation_parameters["model"]=="template_matching"):
         annotations = generate_automated_labels_template_matching(
             audio_dir=audio_dir,
             isolation_parameters=isolation_parameters,
             manual_id=manual_id,
-            normalized_sample_rate=normalized_sample_rate
+            normalized_sample_rate=normalized_sample_rate,
+            filetype=filetype,
+            ignored_files=ignored_files
         )
     else:
         # print("{model_name} model does not exist"\

From 462f0137ab9adcc4fc7e4331c0cff99f67070814 Mon Sep 17 00:00:00 2001
From: TQ Zhang <tianqiz04@gmail.com>
Date: Sun, 12 Jan 2025 22:03:05 -0800
Subject: [PATCH 3/5] fix: Change eager list comprehension to lazy evaluation

also cast ignored list to set to reduce time complexity
---
 PyHa/IsoAutio.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py
index 7417fb5..6535fa3 100644
--- a/PyHa/IsoAutio.py
+++ b/PyHa/IsoAutio.py
@@ -165,11 +165,13 @@ def get_files(root_dir, ignore=[], extension=".wav"):
         ignore (list/iterable): paths to files to ignore, e.g. a template
         extension (str): file extension of files to look for, e.g. ".wav"
     Returns:
-        list of absolute paths to all files with given extension.
+        generator returning absolute paths to all files with given extension.
     """
     root_dir = os.path.abspath(root_dir)
     audio_files = glob.glob(f"{root_dir}/**/*{extension}", recursive=True)
-    audio_files = [file for file in audio_files if file not in ignore]
+    
+    ignore = set(ignore)
+    audio_files = (file for file in audio_files if file not in ignore)
     return audio_files
         
 

From 617917c27d75500a7614120119d64168305c7623 Mon Sep 17 00:00:00 2001
From: TQ Zhang <tianqiz04@gmail.com>
Date: Mon, 13 Jan 2025 11:33:30 -0800
Subject: [PATCH 4/5] fix: made function platform-agnostic with pathlib feat:
 allowed for entire directories to be ignored

---
 PyHa/IsoAutio.py | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py
index 6535fa3..2b3842e 100644
--- a/PyHa/IsoAutio.py
+++ b/PyHa/IsoAutio.py
@@ -10,7 +10,7 @@
 from .template_matching.utils import filter, butter_bandpass, generate_specgram, template_matching_local_score_arr
 
 import os
-import glob
+from pathlib import Path
 import torch
 import librosa
 import pandas as pd
@@ -161,17 +161,31 @@ def get_files(root_dir, ignore=[], extension=".wav"):
     given root directory for files to process.
 
     Args:
-        root_dir (str): path to the root directory
-        ignore (list/iterable): paths to files to ignore, e.g. a template
-        extension (str): file extension of files to look for, e.g. ".wav"
+        root_dir (str): path-like to the root directory from which to draw the files
+        ignore (list/iterable): paths to files or directories to ignore/skip
+        extension (str): file extension of files to look for
     Returns:
-        generator returning absolute paths to all files with given extension.
+        generator returning PosixPath objects to all files with given extension.
     """
+    # initialize path
     root_dir = os.path.abspath(root_dir)
-    audio_files = glob.glob(f"{root_dir}/**/*{extension}", recursive=True)
+    root_dir = Path(root_dir)
+    # get all files
+    audio_files = root_dir.rglob(f"*{extension}")
     
-    ignore = set(ignore)
-    audio_files = (file for file in audio_files if file not in ignore)
+    # handle ignorance
+    ignored_files = set()
+    for root_to_ignore in ignore:
+        # initialize path
+        root_to_ignore = os.path.abspath(root_to_ignore)
+        root_to_ignore = Path(root_to_ignore)
+        # search for files
+        ignored_under_root = set(root_to_ignore.rglob(f"*{extension}"))
+        ignored_under_root.add(root_to_ignore)
+        # update running set
+        ignored_files.update(ignored_under_root)
+    
+    audio_files = (file for file in audio_files if file not in ignored_files)
     return audio_files
         
 
@@ -236,6 +250,9 @@ def isolate(
     # single clip
     isolation_df = pd.DataFrame()
 
+    if not os.path.isdir(audio_dir):
+        audio_dir = os.path.split(audio_dir)[0]
+    
     # deciding which isolation technique to deploy for a given clip based on
     # the technique isolation parameter
     if isolation_parameters["technique"] == "simple":

From 84fc0b04c7e166ff51ce490b7aa2c23e201d506f Mon Sep 17 00:00:00 2001
From: TQ Zhang <tianqiz04@gmail.com>
Date: Mon, 13 Jan 2025 12:08:04 -0800
Subject: [PATCH 5/5] fix!: update isolation to save most specific subfolder

Changed isolate() to take the filepath and split it to keep the FOLDER column consistent.
---
 PyHa/IsoAutio.py | 67 ++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 36 deletions(-)

diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py
index 2b3842e..36905af 100644
--- a/PyHa/IsoAutio.py
+++ b/PyHa/IsoAutio.py
@@ -193,8 +193,7 @@ def isolate(
         local_scores,
         SIGNAL,
         SAMPLE_RATE,
-        audio_dir,
-        filename,
+        filepath,
         isolation_parameters,
         manual_id="bird",
         normalize_local_scores=False):
@@ -214,11 +213,8 @@ def isolate(
         SAMPLE_RATE (int)
             - Sampling rate of the audio clip, usually 44100.
 
-        audio_dir (string)
-            - Directory of the audio clip.
-
-        filename (string)
-            - Name of the audio clip file.
+        filepath (string)
+            - path to the audio clip file.
 
         isolation_parameters (dict)
             - Python Dictionary that controls the various label creation
@@ -233,8 +229,9 @@ def isolate(
     assert isinstance(SIGNAL,np.ndarray)
     assert isinstance(SAMPLE_RATE,int)
     assert SAMPLE_RATE > 0
-    assert isinstance(audio_dir,str)
-    assert isinstance(filename,str)
+    # assert isinstance(audio_dir,str)
+    # assert isinstance(filename,str)
+    # assert isinstance(filepath,str)
     assert isinstance(isolation_parameters,dict)
     assert isinstance(manual_id,str)
     assert isinstance(normalize_local_scores,bool)
@@ -250,9 +247,10 @@ def isolate(
     # single clip
     isolation_df = pd.DataFrame()
 
-    if not os.path.isdir(audio_dir):
-        audio_dir = os.path.split(audio_dir)[0]
-    
+    # split filepath
+    filepath = os.path.abspath(filepath)
+    audio_dir, filename = os.path.split(filepath)    
+
     # deciding which isolation technique to deploy for a given clip based on
     # the technique isolation parameter
     if isolation_parameters["technique"] == "simple":
@@ -1065,13 +1063,12 @@ def generate_automated_labels_microfaune(
         try:
             # Running moment to moment algorithm and appending to a master
             # dataframe.
-            filename = os.path.split(audio_file)[-1]
+            # filename = os.path.split(audio_file)[-1]
             new_entry = isolate(
                 local_scores=local_scores[0],
                 SIGNAL=SIGNAL,
                 SAMPLE_RATE=SAMPLE_RATE,
-                audio_dir=audio_dir,
-                filename=filename,
+                filepath=audio_file,
                 isolation_parameters=isolation_parameters,
                 manual_id=manual_id,
                 normalize_local_scores=normalize_local_scores)
@@ -1206,22 +1203,22 @@ def generate_automated_labels_tweetynet(
         try:
             # Running moment to moment algorithm and appending to a master
             # dataframe. 
-            filename = os.path.split(audio_file)[-1]
             if isolation_parameters["tweety_output"]:
+                filepath = os.path.abspath(audio_file)
+                directory, filename = os.path.split(filepath)
                 new_entry = predictions_to_kaleidoscope(
-                    predictions, 
-                    SIGNAL, 
-                    audio_dir, 
-                    filename, 
-                    manual_id, 
-                    SAMPLE_RATE)
+                    predictions=predictions, 
+                    SIGNAL=SIGNAL, 
+                    audio_dir=directory, 
+                    audio_file=filename, 
+                    manual_id=manual_id, 
+                    sample_rate=SAMPLE_RATE)
             else:
                 new_entry = isolate(
                     local_scores=local_scores[0],
                     SIGNAL=SIGNAL,
                     SAMPLE_RATE=SAMPLE_RATE,
-                    audio_dir=audio_dir,
-                    filename=filename,
+                    filepath=audio_file,
                     isolation_parameters=isolation_parameters,
                     manual_id=manual_id,
                     normalize_local_scores=normalize_local_scores)
@@ -1337,12 +1334,11 @@ def generate_automated_labels_FG_BG_separation(
         try:
             filename = os.path.split(audio_file)[-1]
             new_entry = isolate(
-                local_score_arr,
-                SIGNAL,
-                SAMPLE_RATE,
-                audio_dir,
-                filename,
-                isolation_parameters,
+                local_scores=local_score_arr,
+                SIGNAL=SIGNAL,
+                SAMPLE_RATE=SAMPLE_RATE,
+                filepath=audio_file,
+                isolation_parameters=isolation_parameters,
                 manual_id=manual_id,
             )
             if annotations.empty:
@@ -1466,12 +1462,11 @@ def generate_automated_labels_template_matching(
         try:
             filename = os.path.split(audio_file)[-1]
             new_entry = isolate(
-                local_score_arr,
-                SIGNAL,
-                SAMPLE_RATE,
-                audio_dir,
-                filename,
-                isolation_parameters,
+                local_scores=local_score_arr,
+                SIGNAL=SIGNAL,
+                SAMPLE_RATE=SAMPLE_RATE,
+                filepath=audio_file,
+                isolation_parameters=isolation_parameters,
                 manual_id=manual_id,
             )
             if annotations.empty: