From e50f8a587d31e747019eac5070901c2ecf55bb75 Mon Sep 17 00:00:00 2001 From: naglepuff Date: Fri, 20 Mar 2026 11:17:44 -0400 Subject: [PATCH] Add offset to CSV-based ingest --- .../management/commands/load_public_dataset.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/bats_ai/core/management/commands/load_public_dataset.py b/bats_ai/core/management/commands/load_public_dataset.py index d40b1cdc..7b9ee72f 100644 --- a/bats_ai/core/management/commands/load_public_dataset.py +++ b/bats_ai/core/management/commands/load_public_dataset.py @@ -113,17 +113,23 @@ def _ingest_files_from_manifest( owner: User, public: bool, limit: int | None, + offset: int | None, file_key: str = "file_key", tag_keys: list[str] | None = None, ): if tag_keys is None: tag_keys = [] + if offset is None: + offset = 0 + iterations = 0 with open(manifest) as manifest_file: reader = DictReader(manifest_file) - for line in reader: + for idx, line in enumerate(reader): + if idx < offset: + continue if limit and iterations >= limit: return iterations += 1 @@ -223,6 +229,9 @@ def add_arguments(self, parser): type=int, help="Limit the number of WAV files to be imported", ) + parser.add_argument( + "--offset", type=int, help="Begin ingest from the specified position in the manifest" + ) parser.add_argument( "--filekey", type=str, @@ -265,7 +274,10 @@ def handle(self, *args, **options): public = options.get("public", False) limit = options.get("limit") + offset = options.get("limit") file_key = options.get("filekey", "file_key") + if offset: + self.stdout.write(f"Skipping the first {offset} row(s)...") if limit: self.stdout.write(f"Ingesting the first {limit} files from {manifest}...") _ingest_files_from_manifest( @@ -275,6 +287,7 @@ def handle(self, *args, **options): owner=owner, public=public, limit=limit, + offset=offset, file_key=file_key, tag_keys=tag_keys, )